[llvm-branch-commits] [llvm] release/22.x: [AArch64] Add new pass after VirtRegRewriter to add implicit-defs (#174188) (PR #176197)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jan 15 08:25:34 PST 2026


https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/176197

Backport 9fc7c429752ed87a36f383ee47bad575fea7702a 0133247567a2e69e107bcdd4b1d72fe93b7f93f9 91f5d73b311f3622517ff1d34d21cc8ef1f52ea9

Requested by: @sdesmalen-arm

>From 9e10773097b1d38f2ef604fc861ed1db7e643e8c Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 13 Jan 2026 22:13:31 +0000
Subject: [PATCH 1/3] [AArch64] Let LoadStoreOptimizer handle renamable
 implicit-defs. (#174186)

The LoadStoreOptimizer is very conservative with handling instructions
that have implicit-def operands and only support them for 2
instructions. However, they can be considered when a MachineOperand is
marked explicitly as 'renamable'.

(cherry picked from commit 9fc7c429752ed87a36f383ee47bad575fea7702a)
---
 .../AArch64/AArch64LoadStoreOptimizer.cpp     | 10 +++----
 llvm/test/CodeGen/AArch64/ldst-implicitop.mir | 29 +++++++++++++++++++
 2 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 45599de6a4828..3d9444c0c5426 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -833,10 +833,10 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) {
   }
 }
 
-static bool isRewritableImplicitDef(unsigned Opc) {
-  switch (Opc) {
+static bool isRewritableImplicitDef(const MachineOperand &MO) {
+  switch (MO.getParent()->getOpcode()) {
   default:
-    return false;
+    return MO.isRenamable();
   case AArch64::ORRWrs:
   case AArch64::ADDWri:
     return true;
@@ -1047,7 +1047,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                         MI.getRegClassConstraint(OpIdx, TII, TRI))
                   MatchingReg = GetMatchingSubReg(RC);
                 else {
-                  if (!isRewritableImplicitDef(MI.getOpcode()))
+                  if (!isRewritableImplicitDef(MOP))
                     continue;
                   MatchingReg = GetMatchingSubReg(
                       TRI->getMinimalPhysRegClass(MOP.getReg()));
@@ -1739,7 +1739,7 @@ static bool canRenameMOP(const MachineOperand &MOP,
     // them must be known. For example, in ORRWrs the implicit-def
     // corresponds to the result register.
     if (MOP.isImplicit() && MOP.isDef()) {
-      if (!isRewritableImplicitDef(MOP.getParent()->getOpcode()))
+      if (!isRewritableImplicitDef(MOP))
         return false;
       return TRI->isSuperOrSubRegisterEq(
           MOP.getParent()->getOperand(0).getReg(), MOP.getReg());
diff --git a/llvm/test/CodeGen/AArch64/ldst-implicitop.mir b/llvm/test/CodeGen/AArch64/ldst-implicitop.mir
index 34e8cf282669c..482ae5894a5d8 100644
--- a/llvm/test/CodeGen/AArch64/ldst-implicitop.mir
+++ b/llvm/test/CodeGen/AArch64/ldst-implicitop.mir
@@ -78,3 +78,32 @@ body:             |
     $q1 = ORRv16i8 $q5, killed $q5
     RET_ReallyLR
 ...
+# Test that when the implicit-def is renamable, the loads/stores can still be
+# bundled together.
+---
+name:            impdef_renamable
+tracksRegLiveness: true
+stack:
+  - { id: 0, name: '', type: default, offset: -8, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -8, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+  - { id: 1, name: '', type: default, offset: -16, size: 8, alignment: 8,
+      stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+      local-offset: -16, debug-info-variable: '', debug-info-expression: '',
+      debug-info-location: '' }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: impdef_renamable
+    ; CHECK: early-clobber $sp, renamable $w8, $w9 = frame-setup LDPWpre $sp, -4 :: (load (s32) from %stack.1 + 4), (load (s32) from %stack.1, align 8)
+    ; CHECK-NEXT: STPWi killed renamable $w8, killed $w9, $sp, 2 :: (store (s32) into %stack.0 + 4), (store (s32) into %stack.0, align 8)
+    ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+    ; CHECK-NEXT: RET undef $lr
+    $sp = frame-setup SUBXri $sp, 16, 0
+    renamable $w8 = LDRWui $sp, 1, implicit-def renamable $x8 :: (load (s32) from %stack.1 + 4)
+    STRWui killed renamable $w8, $sp, 3 :: (store (s32) into %stack.0 + 4)
+    renamable $w8 = LDRWui $sp, 0, implicit-def renamable $x8 :: (load (s32) from %stack.1, align 8)
+    STRWui killed renamable $w8, $sp, 2 :: (store (s32) into %stack.0, align 8)
+    $sp = frame-destroy ADDXri $sp, 16, 0
+    RET undef $lr
+...

>From e7abb793f7c5ec6c5fef8cf762f486817c6f5b13 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 13 Jan 2026 22:12:56 +0000
Subject: [PATCH 2/3] [AArch64] Enable subreg liveness tracking for streaming
 functions. (#174189)

Most use of subreg liveness tracking will be for streaming SME2
functions where it can use the strided- and contiguous form of the
multi-vector LD1, see #123081 for details.

Any regressions come from disabling coalescing of SUBREG_TO_REG when
sub-reg liveness tracking is enabled, which may introduce some scalar
`mov`s and may limit some peephole optimizations from the load-store
optimizer, but the impact of disabling coalescing seems limited in
practice (empirically from trying this on workloads) and we think the
regressions are offset by improvements to the handling of multi-vector
instructions (tuple registers) in SME2.

PR #174188 addresses these issues in a separate PR.

(cherry picked from commit 0133247567a2e69e107bcdd4b1d72fe93b7f93f9)
---
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |  12 +-
 llvm/test/CodeGen/AArch64/active_lane_mask.ll |  19 +-
 llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll     |  12 +-
 .../AArch64/get-active-lane-mask-extract.ll   |  21 +-
 .../AArch64/intrinsic-cttz-elts-sve.ll        |  68 +-
 .../sme-avoid-coalescing-locally-streaming.ll |   3 -
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   |   4 +-
 .../CodeGen/AArch64/sme-intrinsics-stores.ll  |   4 +-
 ...ate-sm-changing-call-disable-coalescing.ll |  50 +-
 .../CodeGen/AArch64/sme-streaming-body.ll     |   2 -
 .../AArch64/sme2-intrinsics-add-sub-za16.ll   |  24 -
 .../CodeGen/AArch64/sme2-intrinsics-bfmul.ll  |  18 -
 .../AArch64/sme2-intrinsics-bfscale.ll        |  18 -
 .../CodeGen/AArch64/sme2-intrinsics-cvtn.ll   |   4 -
 .../AArch64/sme2-intrinsics-faminmax.ll       | 156 ++--
 .../CodeGen/AArch64/sme2-intrinsics-fclamp.ll |  18 -
 .../CodeGen/AArch64/sme2-intrinsics-fmlas.ll  | 100 +--
 .../CodeGen/AArch64/sme2-intrinsics-fscale.ll |  54 --
 .../AArch64/sme2-intrinsics-luti4-lane-x4.ll  |   4 +-
 .../CodeGen/AArch64/sme2-intrinsics-luti4.ll  |   8 +-
 .../CodeGen/AArch64/sme2-intrinsics-max.ll    | 400 +++++-----
 .../CodeGen/AArch64/sme2-intrinsics-min.ll    | 400 +++++-----
 .../CodeGen/AArch64/sme2-intrinsics-mlall.ll  | 244 +++---
 .../CodeGen/AArch64/sme2-intrinsics-mlals.ll  | 193 +----
 .../AArch64/sme2-intrinsics-mop4-fp8.ll       |   8 -
 .../AArch64/sme2-intrinsics-mop4a_2x1.ll      |  64 --
 .../AArch64/sme2-intrinsics-mop4a_2x2.ll      |  82 --
 .../CodeGen/AArch64/sme2-intrinsics-rshl.ll   | 208 ++---
 .../CodeGen/AArch64/sme2-intrinsics-sclamp.ll |  24 -
 .../sme2-intrinsics-select-sme-tileslice.ll   |   2 -
 .../AArch64/sme2-intrinsics-sqdmulh.ll        | 104 +--
 .../CodeGen/AArch64/sme2-intrinsics-sub.ll    |  60 --
 .../CodeGen/AArch64/sme2-intrinsics-tmop.ll   |  26 -
 .../CodeGen/AArch64/sme2-intrinsics-uclamp.ll |  24 -
 .../CodeGen/AArch64/sme2p2-intrinsics-fmul.ll |  54 --
 .../streaming-compatible-memory-ops.ll        |   1 +
 .../CodeGen/AArch64/sve-bf16-reductions.ll    |   8 +-
 .../sve-fixed-length-partial-reduce.ll        |  38 -
 llvm/test/CodeGen/AArch64/sve-fmsub.ll        |  52 +-
 .../sve-intrinsics-contiguous-prefetches.ll   |   9 +-
 ...e-intrinsics-ldN-sret-reg+imm-addr-mode.ll |  10 +-
 .../AArch64/sve-intrinsics-reinterpret.ll     |   7 +-
 .../CodeGen/AArch64/sve-intrinsics-sqdec.ll   |  28 +-
 .../CodeGen/AArch64/sve-intrinsics-sqinc.ll   |  28 +-
 .../sve-intrinsics-stN-reg-imm-addr-mode.ll   | 124 +--
 .../sve-intrinsics-stN-reg-reg-addr-mode.ll   |  67 +-
 .../CodeGen/AArch64/sve-intrinsics-stores.ll  |  85 +--
 .../CodeGen/AArch64/sve-intrinsics-while.ll   |  12 +-
 .../sve-streaming-mode-cvt-fp-int-fp.ll       |  20 +-
 .../sve-streaming-mode-cvt-fp-to-int.ll       |  16 +-
 .../sve-streaming-mode-cvt-int-to-fp.ll       |  16 +-
 ...streaming-mode-fixed-length-and-combine.ll |  40 +-
 ...treaming-mode-fixed-length-bit-counting.ll |  78 +-
 ...e-streaming-mode-fixed-length-bitselect.ll |   8 +-
 ...treaming-mode-fixed-length-build-vector.ll |  26 +-
 .../sve-streaming-mode-fixed-length-concat.ll |  42 +-
 ...e-streaming-mode-fixed-length-ext-loads.ll |  32 +-
 ...ing-mode-fixed-length-extract-subvector.ll |  32 +-
 ...ng-mode-fixed-length-extract-vector-elt.ll |  21 +-
 ...e-streaming-mode-fixed-length-fcopysign.ll |  69 +-
 ...ve-streaming-mode-fixed-length-fp-arith.ll | 138 +---
 ...streaming-mode-fixed-length-fp-compares.ll |  25 +-
 ...aming-mode-fixed-length-fp-extend-trunc.ll |  21 +-
 ...e-streaming-mode-fixed-length-fp-minmax.ll |  66 +-
 ...streaming-mode-fixed-length-fp-rounding.ll |  90 +--
 ...e-streaming-mode-fixed-length-fp-select.ll |  26 +-
 ...-streaming-mode-fixed-length-fp-vselect.ll |  46 +-
 ...e-streaming-mode-fixed-length-int-arith.ll | 148 +---
 ...treaming-mode-fixed-length-int-compares.ll |  30 +-
 ...sve-streaming-mode-fixed-length-int-div.ll | 231 +++---
 ...streaming-mode-fixed-length-int-extends.ll | 315 ++++----
 ...sve-streaming-mode-fixed-length-int-log.ll |  78 +-
 ...-streaming-mode-fixed-length-int-minmax.ll | 102 +--
 ...sve-streaming-mode-fixed-length-int-mul.ll |   9 +-
 ...ve-streaming-mode-fixed-length-int-mulh.ll | 188 ++---
 ...-streaming-mode-fixed-length-int-reduce.ll |  41 +-
 ...sve-streaming-mode-fixed-length-int-rem.ll | 210 ++---
 ...-streaming-mode-fixed-length-int-select.ll |  39 +-
 ...-streaming-mode-fixed-length-int-shifts.ll | 428 ++++++++---
 ...streaming-mode-fixed-length-int-vselect.ll |  69 +-
 ...-streaming-mode-fixed-length-ld2-alloca.ll |  14 +-
 ...reaming-mode-fixed-length-limit-duplane.ll |   8 +-
 .../sve-streaming-mode-fixed-length-loads.ll  |   9 +-
 ...-streaming-mode-fixed-length-log-reduce.ll |  33 +-
 ...mode-fixed-length-masked-gather-scatter.ll |  10 +-
 ...eaming-mode-fixed-length-optimize-ptrue.ll |   8 +-
 ...streaming-mode-fixed-length-permute-rev.ll |   7 +-
 .../sve-streaming-mode-fixed-length-ptest.ll  |  52 +-
 ...-streaming-mode-fixed-length-reductions.ll |  28 +-
 ...e-streaming-mode-fixed-length-reshuffle.ll |   5 +-
 .../sve-streaming-mode-fixed-length-rev.ll    |  40 +-
 ...e-streaming-mode-fixed-length-sdiv-pow2.ll |  26 +-
 ...sve-streaming-mode-fixed-length-shuffle.ll |  20 +-
 ...treaming-mode-fixed-length-trunc-stores.ll |   8 +-
 .../sve-streaming-mode-fixed-length-trunc.ll  | 720 +++++++++---------
 ...eaming-mode-fixed-length-vector-shuffle.ll |  34 +-
 .../AArch64/sve-vector-deinterleave.ll        |  91 +--
 .../CodeGen/AArch64/sve-vector-interleave.ll  |  69 +-
 .../CodeGen/AArch64/sve2-intrinsics-luti.ll   |  12 +-
 .../CodeGen/AArch64/sve2-intrinsics-while.ll  |  10 +-
 .../AArch64/sve2p1-dots-partial-reduction.ll  |  12 +-
 .../AArch64/sve2p1-intrinsics-crypto.ll       |  30 +-
 .../sve2p1-intrinsics-multivec-stores.ll      | 161 +---
 .../AArch64/sve2p1-intrinsics-selx4.ll        | 112 +--
 .../AArch64/sve2p1-intrinsics-stores.ll       | 102 +--
 .../AArch64/sve2p1-intrinsics-uzpx4.ll        |  20 +-
 .../AArch64/sve2p1-intrinsics-while-pp.ll     |  40 +-
 107 files changed, 2294 insertions(+), 5048 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 92a7412e83fac..4d326dc97ca51 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -400,7 +400,17 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
     ReserveXRegisterForRA.set(29);
 
-  EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
+  // To benefit from SME2's strided-register multi-vector load/store
+  // instructions we'll need to enable subreg liveness. Our longer
+  // term aim is to make this the default, regardless of streaming
+  // mode, but there are still some outstanding issues, see:
+  //  https://github.com/llvm/llvm-project/pull/174188
+  // and:
+  //  https://github.com/llvm/llvm-project/pull/168353
+  if (IsStreaming)
+    EnableSubregLiveness = true;
+  else
+    EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
 }
 
 const CallLowering *AArch64Subtarget::getCallLowering() const {
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index b77e90f6fdc45..44b90dfb73703 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STREAMING
 
 ; == Scalable ==
 
@@ -209,7 +209,6 @@ define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
   ret <16 x i1> %active.lane.mask
@@ -220,7 +219,6 @@ define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
   ret <8 x i1> %active.lane.mask
@@ -231,7 +229,6 @@ define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.h, w0, w1
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
   ret <4 x i1> %active.lane.mask
@@ -242,7 +239,6 @@ define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.s, w0, w1
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC)
   ret <2 x i1> %active.lane.mask
@@ -253,7 +249,6 @@ define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC)
   ret <16 x i1> %active.lane.mask
@@ -264,7 +259,6 @@ define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC)
   ret <8 x i1> %active.lane.mask
@@ -275,7 +269,6 @@ define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.h, x0, x1
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC)
   ret <4 x i1> %active.lane.mask
@@ -286,7 +279,6 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.s, x0, x1
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC)
   ret <2 x i1> %active.lane.mask
@@ -313,7 +305,6 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
 ; CHECK-STREAMING-NEXT:    mov z1.b, w1
 ; CHECK-STREAMING-NEXT:    cmphi p0.b, p0/z, z1.b, z0.b
 ; CHECK-STREAMING-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC)
   ret <16 x i1> %active.lane.mask
@@ -340,7 +331,6 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
 ; CHECK-STREAMING-NEXT:    mov z1.b, w1
 ; CHECK-STREAMING-NEXT:    cmphi p0.b, p0/z, z1.b, z0.b
 ; CHECK-STREAMING-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC)
   ret <8 x i1> %active.lane.mask
@@ -372,7 +362,6 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
 ; CHECK-STREAMING-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-STREAMING-NEXT:    cmphi p0.h, p0/z, z1.h, z0.h
 ; CHECK-STREAMING-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC)
   ret <4 x i1> %active.lane.mask
@@ -402,7 +391,6 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
 ; CHECK-STREAMING-NEXT:    umin z0.s, z0.s, #255
 ; CHECK-STREAMING-NEXT:    cmphi p0.s, p0/z, z1.s, z0.s
 ; CHECK-STREAMING-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-STREAMING-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-STREAMING-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC)
   ret <2 x i1> %active.lane.mask
@@ -422,6 +410,7 @@ define <vscale x 4 x i1> @lane_mask_nxv4i1_imm5() {
 ; CHECK-LABEL: lane_mask_nxv4i1_imm5:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #5 // =0x5
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
 entry:
@@ -443,6 +432,7 @@ define <vscale x 16 x i1> @lane_mask_nxv16i1_imm10() {
 ; CHECK-LABEL: lane_mask_nxv16i1_imm10:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #10 // =0xa
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    ret
 entry:
@@ -465,7 +455,6 @@ define <8 x i1> @lane_mask_v8i1_imm3() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.b, vl3
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 0, i64 3)
diff --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
index e42f2b1cfba48..b735a52e49d63 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
@@ -1,15 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s
-; RUN: llc -mattr=+sve,+sme2,+fp8 < %s | FileCheck %s
-; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sve2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mattr=+sve,+sme2,+fp8 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mattr=+sme2,+fp8 --force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
 
 target triple = "aarch64-linux"
 
 define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
 ; CHECK-LABEL: cvtn_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvtn z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
@@ -19,8 +17,6 @@ define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bf
 define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
 ; CHECK-LABEL: cvtn_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtn z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
@@ -30,8 +26,6 @@ define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half>
 define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
 ; CHECK-LABEL: cvtnb_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtnb z0.b, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index e2c861b40e706..01e490b260712 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mattr=+sve    < %s | FileCheck %s -check-prefix CHECK-SVE
-; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
-; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
+; RUN: llc -enable-subreg-liveness -mattr=+sve    < %s | FileCheck %s -check-prefix CHECK-SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
+; RUN: llc -enable-subreg-liveness -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
 target triple = "aarch64-linux"
 
 ; Test combining of getActiveLaneMask with a pair of extract_vector operations.
@@ -18,6 +18,8 @@ define void @test_2x8bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #0
 ; CHECK-SVE2p1-SME2:       // %bb.0:
 ; CHECK-SVE2p1-SME2-NEXT:    mov w8, w1
 ; CHECK-SVE2p1-SME2-NEXT:    mov w9, w0
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x8 killed $w8
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.h, p1.h }, x9, x8
 ; CHECK-SVE2p1-SME2-NEXT:    b use
     %r = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 %i, i32 %n)
@@ -179,10 +181,9 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE-NEXT:    cset w8, mi
 ; CHECK-SVE-NEXT:    mov z1.s, p0/z, #1 // =0x1
 ; CHECK-SVE-NEXT:    fmov s0, w8
+; CHECK-SVE-NEXT:    // kill: def $q0 killed $d0
 ; CHECK-SVE-NEXT:    mov v0.s[1], v1.s[1]
 ; CHECK-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-SVE-NEXT:    b use
 ;
 ; CHECK-SVE2p1-LABEL: test_fixed_extract:
@@ -191,10 +192,9 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE2p1-NEXT:    cset w8, mi
 ; CHECK-SVE2p1-NEXT:    mov z1.s, p0/z, #1 // =0x1
 ; CHECK-SVE2p1-NEXT:    fmov s0, w8
+; CHECK-SVE2p1-NEXT:    // kill: def $q0 killed $d0
 ; CHECK-SVE2p1-NEXT:    mov v0.s[1], v1.s[1]
 ; CHECK-SVE2p1-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-SVE2p1-NEXT:    // kill: def $d0 killed $d0 killed $q0
-; CHECK-SVE2p1-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-SVE2p1-NEXT:    b use
 ;
 ; CHECK-SME2-LABEL: test_fixed_extract:
@@ -205,9 +205,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SME2-NEXT:    fmov s2, w8
 ; CHECK-SME2-NEXT:    mov z0.s, z1.s[1]
 ; CHECK-SME2-NEXT:    ext z1.b, z1.b, z1.b, #8
-; CHECK-SME2-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-SME2-NEXT:    zip1 z0.s, z2.s, z0.s
-; CHECK-SME2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-SME2-NEXT:    b use
     %r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n)
     %v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0)
@@ -265,6 +263,8 @@ define void @test_2x16bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #
 ; CHECK-SVE2p1-SME2:       // %bb.0:
 ; CHECK-SVE2p1-SME2-NEXT:    mov w8, w1
 ; CHECK-SVE2p1-SME2-NEXT:    mov w9, w0
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x8 killed $w8
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.b, p1.b }, x9, x8
 ; CHECK-SVE2p1-SME2-NEXT:    b use
   %r = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32 %i, i32 %n)
@@ -297,8 +297,11 @@ define void @test_2x32bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #
 ; CHECK-SVE2p1-SME2-NEXT:    mov w9, w1
 ; CHECK-SVE2p1-SME2-NEXT:    mov w10, w0
 ; CHECK-SVE2p1-SME2-NEXT:    adds w8, w0, w8
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x9 killed $w9
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x10 killed $w10
 ; CHECK-SVE2p1-SME2-NEXT:    csinv w8, w8, wzr, lo
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.b, p1.b }, x10, x9
+; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p2.b, p3.b }, x8, x9
 ; CHECK-SVE2p1-SME2-NEXT:    b use
   %r = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i32(i32 %i, i32 %n)
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index ca16df3c09ade..054f34bcff6fb 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,NONSTREAMING
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,STREAMING
 
 ; WITH VSCALE RANGE
 
@@ -102,7 +102,8 @@ define i64 @vscale_4096(<vscale x 16 x i8> %a) #1 {
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    sub w0, w9, w8
+; CHECK-NEXT:    sub w8, w9, w8
+; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i8(<vscale x 16 x i8> %a, i1 0)
   ret i64 %res
@@ -131,6 +132,7 @@ define i64 @vscale_4096_poison(<vscale x 16 x i8> %a) #1 {
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    sub w8, w9, w8
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    and x0, x8, #0xffff
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i8(<vscale x 16 x i8> %a, i1 1)
@@ -145,7 +147,6 @@ define i32 @ctz_nxv2i1(<vscale x 2 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 0)
   ret i32 %res
@@ -157,7 +158,6 @@ define i32 @ctz_nxv2i1_poison(<vscale x 2 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 1)
   ret i32 %res
@@ -179,10 +179,8 @@ define i32 @add_i32_ctz_nxv2i1_poison(<vscale x 2 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv2i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.d
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -196,7 +194,6 @@ define i32 @ctz_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 0)
   ret i32 %res
@@ -208,7 +205,6 @@ define i32 @ctz_nxv4i1_poison(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 1)
   ret i32 %res
@@ -230,10 +226,8 @@ define i32 @add_i32_ctz_nxv4i1_poison(<vscale x 4 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv4i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.s
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -247,7 +241,6 @@ define i32 @ctz_nxv8i1(<vscale x 8 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 0)
   ret i32 %res
@@ -259,7 +252,6 @@ define i32 @ctz_nxv8i1_poison(<vscale x 8 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 1)
   ret i32 %res
@@ -281,10 +273,8 @@ define i32 @add_i32_ctz_nxv8i1_poison(<vscale x 8 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv8i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.h
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -298,7 +288,6 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
   ret i32 %res
@@ -310,7 +299,6 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
   ret i32 %res
@@ -323,7 +311,6 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne <vscale x 16 x i8> %a, %b
   %select = select <vscale x 16 x i1> %pg, <vscale x 16 x i1> %cmp, <vscale x 16 x i1> zeroinitializer
@@ -348,10 +335,8 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv16i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.b
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -370,20 +355,17 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v16i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
   ret i32 %res
@@ -398,20 +380,17 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v16i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
   ret i32 %res
@@ -430,9 +409,8 @@ define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
 ;
 ; STREAMING-LABEL: add_i64_ctz_v16i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl16
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
@@ -453,20 +431,17 @@ define i32 @ctz_v8i1(<8 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v8i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
   ret i32 %res
@@ -481,20 +456,17 @@ define i32 @ctz_v8i1_poison(<8 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v8i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    ptrue p0.b, vl8
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
   ret i32 %res
@@ -509,20 +481,17 @@ define i32 @ctz_v4i1(<4 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v4i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
 ; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
   ret i32 %res
@@ -537,20 +506,17 @@ define i32 @ctz_v4i1_poison(<4 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v4i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    ptrue p0.h, vl4
 ; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
 ; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
   ret i32 %res
@@ -565,20 +531,17 @@ define i32 @ctz_v2i1(<2 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v2i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
 ; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
   ret i32 %res
@@ -593,20 +556,17 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
-; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v2i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
-; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    ptrue p0.s, vl2
 ; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
 ; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
-; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
   ret i32 %res
diff --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
index ba13a89fd9c1a..550ec624057ed 100644
--- a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
+++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
@@ -28,7 +28,6 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
   ; CHECK-REGALLOC-NEXT:   STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
   ; CHECK-REGALLOC-NEXT:   renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
-  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL killed renamable $q0, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
@@ -59,7 +58,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
-  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL renamable $q0, implicit killed $z0
   ; CHECK-REGALLOC-NEXT:   STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg, implicit-def $fpmr
   ; CHECK-REGALLOC-NEXT:   $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
@@ -95,7 +93,6 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
   ; CHECK-REGALLOC-NEXT:   STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg, implicit-def $fpmr
   ; CHECK-REGALLOC-NEXT:   renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
-  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL killed renamable $q0, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
index 57f8e5438eaf2..f12138b785e85 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
@@ -299,7 +299,6 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
 define void @ldr_with_off_var(ptr %base, i32 %off) {
 ; CHECK-LABEL: ldr_with_off_var:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    rdsvl x9, #1
 ; CHECK-NEXT:    add w12, w1, #16
@@ -457,8 +456,9 @@ define void @ldr_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w2, #32
 ; CHECK-NEXT:    rdsvl x10, #1
-; CHECK-NEXT:    sxtw x9, w8
+; CHECK-NEXT:    mov w9, w8
 ; CHECK-NEXT:    add w12, w0, w8
+; CHECK-NEXT:    sxtw x9, w9
 ; CHECK-NEXT:    madd x9, x10, x9, x1
 ; CHECK-NEXT:    ldr za[w12, 1], [x9, #1, mul vl]
 ; CHECK-NEXT:    ldr za[w12, 2], [x9, #2, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
index 1ff32aade4a1f..0443a097f8e07 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
@@ -299,7 +299,6 @@ define void @str_with_off_16mulvl(ptr %ptr) {
 define void @str_with_off_var(ptr %base, i32 %off) {
 ; CHECK-LABEL: str_with_off_var:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    rdsvl x9, #1
 ; CHECK-NEXT:    add w12, w1, #16
@@ -461,8 +460,9 @@ define void @str_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w2, #32
 ; CHECK-NEXT:    rdsvl x10, #1
-; CHECK-NEXT:    sxtw x9, w8
+; CHECK-NEXT:    mov w9, w8
 ; CHECK-NEXT:    add w12, w0, w8
+; CHECK-NEXT:    sxtw x9, w9
 ; CHECK-NEXT:    madd x9, x10, x9, x1
 ; CHECK-NEXT:    str za[w12, 1], [x9, #1, mul vl]
 ; CHECK-NEXT:    str za[w12, 2], [x9, #2, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index 98735c9518b73..06cabf3631a13 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -154,7 +154,6 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Spill
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Reload
@@ -192,7 +191,6 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Spill
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Reload
@@ -230,7 +228,6 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
@@ -273,7 +270,6 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
@@ -312,7 +308,6 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
@@ -351,7 +346,6 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
@@ -390,7 +384,6 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
@@ -429,7 +422,6 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Spill
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Reload
@@ -465,12 +457,10 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
-; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
+; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
 ; CHECK-NEXT:    bl use_v4f32
@@ -508,7 +498,6 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Reload
@@ -551,7 +540,6 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -589,7 +577,6 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -627,7 +614,6 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -665,7 +651,6 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -703,7 +688,6 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -741,7 +725,6 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -779,7 +762,6 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -817,7 +799,6 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
@@ -855,12 +836,10 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x30, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Spill
 ; CHECK-NEXT:    and z1.b, z1.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
@@ -1011,10 +990,9 @@ define void @dont_coalesce_res_f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1223,10 +1201,9 @@ define void @dont_coalesce_res_v1f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1318,10 +1295,9 @@ define void @dont_coalesce_res_v16i8(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1348,10 +1324,9 @@ define void @dont_coalesce_res_v8i16(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1378,10 +1353,9 @@ define void @dont_coalesce_res_v4i32(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1408,10 +1382,9 @@ define void @dont_coalesce_res_v2i64(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1438,10 +1411,9 @@ define void @dont_coalesce_res_v8f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1468,10 +1440,9 @@ define void @dont_coalesce_res_v4f32(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
@@ -1498,10 +1469,9 @@ define void @dont_coalesce_res_v2f64(ptr %ptr) #0 {
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Reload
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    str z0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index 6c3975a9b452b..7c611d33636c6 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -96,10 +96,8 @@ define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_psta
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    index z0.d, #0, #1
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Reload
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
 ; CHECK-NEXT:    add z0.d, z0.d, #41 // =0x29
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
index dbf47d980d26e..d436d647e6de0 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
@@ -6,9 +6,7 @@ target triple = "aarch64-linux"
 define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #0 {
 ; CHECK-LABEL: add_f16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    fadd za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -21,11 +19,7 @@ define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
 ; CHECK-LABEL: add_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    fadd za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -41,9 +35,7 @@ define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #1 {
 ; CHECK-LABEL: sub_f16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    fsub za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -56,11 +48,7 @@ define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
 ; CHECK-LABEL: sub_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    fsub za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -76,9 +64,7 @@ define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
 ; CHECK-LABEL: add_bf16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    bfadd za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -91,11 +77,7 @@ define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
 ; CHECK-LABEL: add_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    bfadd za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -111,9 +93,7 @@ define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
 ; CHECK-LABEL: sub_bf16_vg1x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    bfsub za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -126,11 +106,7 @@ define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @sub_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
 ; CHECK-LABEL: sub_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    bfsub za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll
index 59fef413d59de..24c1f0a20efef 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfmul.ll
@@ -4,8 +4,6 @@
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_mul_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmul { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmul.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -15,10 +13,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_mul_single_x2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_mul_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmul { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmul.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -28,10 +22,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_mul_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_mul_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmul { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmul.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -41,14 +31,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_mul_x2_bf16(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_mul_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_mul_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmul { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmul.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll
index 3254ee4cb6581..5730bd3e65c16 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-bfscale.ll
@@ -4,8 +4,6 @@
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_scale_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fscale.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x i16> %zm)
@@ -15,10 +13,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_scale_single_
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_scale_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fscale.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x i16> %zm)
@@ -28,10 +22,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_scale_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fscale.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -41,14 +31,6 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_scale_x2_bf16
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_scale_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fscale.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
index 9b68f25bb0649..f539ac3d0904d 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
@@ -7,8 +7,6 @@
 define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: multi_vector_cvtn_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtn z0.h, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
@@ -22,8 +20,6 @@ define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1,
 define <vscale x 8 x bfloat> @multi_vector_bfcvtn_x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: multi_vector_bfcvtn_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvtn z0.h, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvtn.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
index eab0adf70f8c8..ca2943905c058 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
@@ -53,19 +53,19 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }@multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1,  <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
@@ -74,19 +74,19 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1,  <vscale x 4 x float> %zm2,  <vscale x 4 x float> %zm3,  <vscale x 4 x float> %zm4)
   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
@@ -95,19 +95,19 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sme.famax.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
@@ -168,19 +168,19 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_main_multi_x2
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sme.famin.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -191,19 +191,19 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sme.famin.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -214,19 +214,19 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    famin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    famin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sme.famin.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll
index b7ada9f1d3faa..6270ef0c766d6 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fclamp.ll
@@ -6,8 +6,6 @@ target triple = "aarch64-linux-gnu"
 define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) {
 ; CHECK-LABEL: test_fclamp_single_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d)
@@ -17,8 +15,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<
 define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) {
 ; CHECK-LABEL: test_fclamp_single_x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d)
@@ -28,8 +24,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32
 define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) {
 ; CHECK-LABEL: test_fclamp_single_x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d)
@@ -40,10 +34,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x4_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f) {
 ; CHECK-LABEL: test_fclamp_single_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
@@ -53,10 +43,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x4_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f) {
 ; CHECK-LABEL: test_fclamp_single_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
@@ -66,10 +52,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x4_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f) {
 ; CHECK-LABEL: test_fclamp_single_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
index 006e8aa2475ba..a03af63b86a73 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
@@ -6,9 +6,7 @@
 define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -25,9 +23,7 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -44,11 +40,7 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_single_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -68,11 +60,7 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_single_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -94,9 +82,7 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -113,9 +99,7 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -132,11 +116,7 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_single_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -156,11 +136,7 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_single_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -182,11 +158,7 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
 ; CHECK-LABEL: multi_vector_add_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -204,11 +176,7 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_add_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
 ; CHECK-LABEL: multi_vector_add_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -244,15 +212,7 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, <vscale x 4 x float>
 define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -270,15 +230,7 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_add_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -320,11 +272,7 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, <vscale x 4 x float>
 define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
 ; CHECK-LABEL: multi_vector_sub_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -342,11 +290,7 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
 ; CHECK-LABEL: multi_vector_sub_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -364,15 +308,7 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <v
 define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -390,15 +326,7 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -418,9 +346,7 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <v
 define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    ret
@@ -437,9 +363,7 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    ret
@@ -458,8 +382,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s_regclass:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z5.d, z0.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z4.s, z5.s }, z2.s[3]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 %slice,
@@ -471,11 +395,7 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
 define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    ret
@@ -495,11 +415,7 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    ret
@@ -521,8 +437,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s_regclass:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    mov z27.d, z0.d
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z24.s - z27.s }, z4.s[3]
@@ -540,9 +456,7 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
 define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    ret
@@ -559,9 +473,7 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    ret
@@ -578,11 +490,7 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    ret
@@ -602,11 +510,7 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
index 591fe8da6b79c..7ef319694917e 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
@@ -6,8 +6,6 @@
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x2_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm)
@@ -17,8 +15,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x2_h
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x2_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.s, z1.s }, { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm)
@@ -28,8 +24,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x2
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_x2_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.d, z1.d }, { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm)
@@ -41,10 +35,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x4_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm)
@@ -54,10 +44,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x4_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2,  <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.s - z3.s }, { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm)
@@ -67,10 +53,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_x4_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.d - z3.d }, { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm)
@@ -81,10 +63,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x2_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -94,10 +72,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x2_half( <v
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x2_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2 ) {
 ; CHECK-LABEL: multi_vec_scale_x2_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
@@ -107,10 +81,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x2_float(
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x2_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
@@ -121,14 +91,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x2_doub
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x4_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_half:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
@@ -138,14 +100,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x4_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2,  <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_float:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
@@ -155,14 +109,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x4_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_double:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll
index d48e0cd4d9a92..44582d982011b 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4-lane-x4.ll
@@ -54,10 +54,10 @@ define void @test_multiple_luti4_zt_i8(ptr %ptrA, ptr %ptrB, <vscale x 16 x i8>
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr zt0, [x0]
 ; CHECK-NEXT:    luti4 { z4.s - z7.s }, zt0, z0[1]
-; CHECK-NEXT:    // fake_use: $z4 $z4_z5_z6_z7
+; CHECK-NEXT:    // fake_use: $z4
 ; CHECK-NEXT:    ldr zt0, [x1]
 ; CHECK-NEXT:    luti4 { z0.s - z3.s }, zt0, z0[1]
-; CHECK-NEXT:    // fake_use: $z0 $z0_z1_z2_z3
+; CHECK-NEXT:    // fake_use: $z0
 ; CHECK-NEXT:    ret
   tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptrA)
   %res1 = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sme.luti4.lane.zt.x4.nxv4f32(i32 0, <vscale x 16 x i8> %x, i32 1)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
index c1eff8dd1f8a8..abc7c0ffacb6f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
@@ -6,8 +6,6 @@ target triple = "aarch64-linux"
 define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>}  @test_luti4_zt_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) #0 {
 ; CHECK-LABEL: test_luti4_zt_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    luti4 { z0.b - z3.b }, zt0, { z0, z1 }
 ; CHECK-NEXT:    ret
   %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1)
@@ -19,13 +17,11 @@ define void @test_multiple_luti4_zt_i8(ptr %ptrA, ptr %ptrB, <vscale x 16 x i8>
 ; CHECK-LABEL: test_multiple_luti4_zt_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr zt0, [x0]
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    luti4 { z4.b - z7.b }, zt0, { z0, z1 }
-; CHECK-NEXT:    // fake_use: $z4 $z4_z5_z6_z7
+; CHECK-NEXT:    // fake_use: $z4
 ; CHECK-NEXT:    ldr zt0, [x1]
 ; CHECK-NEXT:    luti4 { z0.b - z3.b }, zt0, { z0, z1 }
-; CHECK-NEXT:    // fake_use: $z0 $z0_z1_z2_z3
+; CHECK-NEXT:    // fake_use: $z0
 ; CHECK-NEXT:    ret
   tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr %ptrA)
   %res1 = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
index e7c9a0a2f5913..a1aff0c423083 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
@@ -114,8 +114,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_u64(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_max_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -316,10 +314,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_max_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -511,10 +505,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_u64(<v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_max_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -573,19 +563,19 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -597,19 +587,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -621,19 +611,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -645,19 +635,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -671,19 +661,19 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -695,19 +685,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -719,19 +709,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -743,19 +733,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -769,14 +759,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_max_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -788,19 +770,19 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -812,19 +794,19 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -836,19 +818,19 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -862,8 +844,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -916,10 +896,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_maxnm_single
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -987,10 +963,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_maxnm_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1049,14 +1021,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x2_f64(
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1068,19 +1032,19 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1091,19 +1055,19 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1114,19 +1078,19 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
index 2b470190f94b2..ad42a4d7b54b0 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
@@ -114,8 +114,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_min_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -316,10 +314,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_min_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_min_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -511,10 +505,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_min_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -573,19 +563,19 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -597,19 +587,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -621,19 +611,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -645,19 +635,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -671,19 +661,19 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -695,19 +685,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -719,19 +709,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -743,19 +733,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -770,14 +760,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_min_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_min_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmin.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -789,19 +771,19 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -813,19 +795,19 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -837,19 +819,19 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -863,8 +845,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -917,10 +897,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_minnm_single
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_minnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -988,10 +964,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_minnm_x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1050,14 +1022,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_minnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fminnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1069,19 +1033,19 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1092,19 +1056,19 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1115,19 +1079,19 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
index a2a819057e8e8..3feca1eeabbc8 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
@@ -38,9 +38,7 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -53,9 +51,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -70,11 +66,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -87,11 +79,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -143,16 +131,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -164,16 +152,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -215,8 +203,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -230,8 +218,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -247,8 +235,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -264,8 +252,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -312,9 +300,7 @@ define void @multi_vector_mul_add_single_long_vg4x1_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -327,9 +313,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -344,11 +328,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -361,11 +341,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -417,16 +393,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -438,16 +414,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -489,8 +465,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -504,8 +480,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -521,8 +497,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -538,8 +514,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -586,9 +562,7 @@ define void @multi_vector_mul_sub_single_long_vg4x1_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -601,9 +575,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -618,11 +590,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -635,11 +603,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -691,16 +655,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -712,16 +676,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -763,8 +727,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -778,8 +742,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -795,8 +759,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -812,8 +776,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -860,9 +824,7 @@ define void @multi_vector_mul_sub_single_long_vg4x1_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -875,9 +837,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -892,11 +852,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -909,11 +865,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -965,16 +917,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -986,16 +938,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
-; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -1037,8 +989,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1052,8 +1004,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -1069,8 +1021,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1086,8 +1038,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -1108,9 +1060,7 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -1125,11 +1075,7 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscal
 define void @multi_vector_mul_add_single_signed_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -1160,8 +1106,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, <vscale
 ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1177,8 +1123,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x4_s8(i32 %slice, <vscale
 ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1212,9 +1158,7 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x1_s8(i32 %slice, <vsc
 define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -1229,11 +1173,7 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vsc
 define void @multi_vector_mul_add_single_unsigned_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -1268,16 +1208,16 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x4_u8(i32 %slice, <vsca
 ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x1]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
-; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -1306,8 +1246,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, <vscal
 ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1323,8 +1263,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x4_s8(i32 %slice, <vscal
 ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
index e817dac3d1a6b..20251aff99de1 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
@@ -120,9 +120,7 @@ define void @multi_vector_sub_single_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -135,9 +133,7 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -150,9 +146,7 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -165,9 +159,7 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -184,9 +176,7 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -199,9 +189,7 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -214,9 +202,7 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -229,9 +215,7 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -248,11 +232,7 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -269,11 +249,8 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -290,11 +267,7 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -311,11 +284,7 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -336,11 +305,7 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -357,11 +322,7 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -378,11 +339,7 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -399,11 +356,7 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -424,11 +377,7 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0,  <vscale x 8 x bfloat> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -443,11 +392,7 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -462,11 +407,7 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -481,11 +422,7 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -504,11 +441,7 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0, <vscale x 8 x bfloat> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -523,11 +456,7 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -542,11 +471,7 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -561,11 +486,7 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -584,15 +505,7 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -610,15 +523,7 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -636,15 +541,7 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -662,15 +559,7 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -692,15 +581,7 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -718,15 +599,7 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -744,15 +617,7 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -770,15 +635,7 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -912,9 +769,7 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %zn,
 define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -929,9 +784,7 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -946,9 +799,7 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -963,9 +814,7 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -984,9 +833,7 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1001,9 +848,7 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1018,9 +863,7 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1035,9 +878,7 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -1056,11 +897,7 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1077,11 +914,7 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1098,11 +931,7 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1119,11 +948,7 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1144,11 +969,7 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1165,11 +986,7 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1186,11 +1003,7 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1207,11 +1020,7 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll
index 5a0cf8e57904b..ca2e130e53f0d 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4-fp8.ll
@@ -28,8 +28,6 @@ define void @mop4a_za16_fp8_2x1(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2
 ; CHECK-LABEL: mop4a_za16_fp8_2x1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.h, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x1(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -40,9 +38,7 @@ define void @mop4a_za16_fp8_2x2(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2
 ; CHECK-LABEL: mop4a_za16_fp8_2x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.h, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fp8.fmop4a.za16.2x2(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -74,8 +70,6 @@ define void @mop4a_za32_fp8_2x1(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2
 ; CHECK-LABEL: mop4a_za32_fp8_2x1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x1(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -86,9 +80,7 @@ define void @mop4a_za32_fp8_2x2(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2
 ; CHECK-LABEL: mop4a_za32_fp8_2x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fp8.fmop4a.za32.2x2(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x1.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x1.ll
index ef1536fae6496..0b6aa5d550a92 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x1.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x1.ll
@@ -8,8 +8,6 @@ define void @mop4a_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4a_za32_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4a za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4a.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -20,8 +18,6 @@ define void @mop4s_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4s_za32_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -32,8 +28,6 @@ define void @mop4a_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4a_za32_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4a za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -44,8 +38,6 @@ define void @mop4s_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4s_za32_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4s za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4s.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -56,8 +48,6 @@ define void @mop4a_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4a_za32_s8_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4a za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4a.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -68,8 +58,6 @@ define void @mop4s_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4s_za32_s8_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4s za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4s.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -80,8 +68,6 @@ define void @mop4a_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4a_za32_u8_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4a za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4a.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -92,8 +78,6 @@ define void @mop4s_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4s_za32_u8_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4s za0.s, { z0.b, z1.b }, z24.b
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4s.wide.2x1.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm)
@@ -105,8 +89,6 @@ define void @mop4a_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za32_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4a za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4a.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -117,8 +99,6 @@ define void @mop4s_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za32_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -129,8 +109,6 @@ define void @mop4a_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za32_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4a za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4a.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -141,8 +119,6 @@ define void @mop4s_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za32_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4s za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4s.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -153,8 +129,6 @@ define void @mop4a_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4a_za32_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.wide.2x1.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm)
@@ -165,8 +139,6 @@ define void @mop4s_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4s_za32_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.wide.2x1.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm)
@@ -177,8 +149,6 @@ define void @mop4a_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4a_za32_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4a za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.wide.2x1.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm)
@@ -189,8 +159,6 @@ define void @mop4s_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4s_za32_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4s za0.s, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.wide.2x1.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm)
@@ -201,8 +169,6 @@ define void @mop4a_za64_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za64_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4a za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4a.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -213,8 +179,6 @@ define void @mop4s_za64_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za64_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -225,8 +189,6 @@ define void @mop4a_za64_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za64_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4a za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4a.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -237,8 +199,6 @@ define void @mop4s_za64_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za64_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4s za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4s.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -249,8 +209,6 @@ define void @mop4a_za64_s16_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4a_za64_s16_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4a za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4a.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -261,8 +219,6 @@ define void @mop4s_za64_s16_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4s_za64_s16_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4s za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4s.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -273,8 +229,6 @@ define void @mop4a_za64_u16_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4a_za64_u16_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4a za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4a.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -285,8 +239,6 @@ define void @mop4s_za64_u16_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4s_za64_u16_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4s za0.d, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4s.za64.wide.2x1.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm)
@@ -298,8 +250,6 @@ define void @mop4a_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4a_za16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.h, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x1.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm)
@@ -310,8 +260,6 @@ define void @mop4s_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4s_za16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.h, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x1.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm)
@@ -322,8 +270,6 @@ define void @mop4a_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2
 ; CHECK-LABEL: mop4a_za32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.s, { z0.s, z1.s }, z24.s
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x1.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm)
@@ -334,8 +280,6 @@ define void @mop4s_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2
 ; CHECK-LABEL: mop4s_za32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.s, { z0.s, z1.s }, z24.s
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x1.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm)
@@ -346,8 +290,6 @@ define void @mop4a_za64_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %z
 ; CHECK-LABEL: mop4a_za64_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.d, { z0.d, z1.d }, z24.d
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x1.nxv2f64(i32 0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm)
@@ -358,8 +300,6 @@ define void @mop4s_za64_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %z
 ; CHECK-LABEL: mop4s_za64_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.d, { z0.d, z1.d }, z24.d
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x1.nxv2f64(i32 0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm)
@@ -370,8 +310,6 @@ define void @mop4a_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4a_za16_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4a za0.h, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x1.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm)
@@ -382,8 +320,6 @@ define void @mop4s_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4s_za16_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4s za0.h, { z0.h, z1.h }, z24.h
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x1.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x2.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x2.ll
index 4d0d696dc42cf..20cc730953351 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x2.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_2x2.ll
@@ -8,9 +8,7 @@ define void @mop4a_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4a_za32_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -21,9 +19,7 @@ define void @mop4s_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4s_za32_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -34,9 +30,7 @@ define void @mop4a_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4a_za32_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -47,9 +41,7 @@ define void @mop4s_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vs
 ; CHECK-LABEL: mop4s_za32_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -60,9 +52,7 @@ define void @mop4a_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4a_za32_s8_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -73,9 +63,7 @@ define void @mop4s_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4s_za32_s8_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -86,9 +74,7 @@ define void @mop4a_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4a_za32_u8_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4a za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4a.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -99,9 +85,7 @@ define void @mop4s_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
 ; CHECK-LABEL: mop4s_za32_u8_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4s za0.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4s.wide.2x2.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -113,9 +97,7 @@ define void @mop4a_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za32_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4a.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -126,9 +108,7 @@ define void @mop4s_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za32_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -139,9 +119,7 @@ define void @mop4a_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za32_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4a.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -152,9 +130,7 @@ define void @mop4s_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za32_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4s.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -165,9 +141,7 @@ define void @mop4a_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4a_za32_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.wide.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -178,9 +152,7 @@ define void @mop4s_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4s_za32_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -191,9 +163,7 @@ define void @mop4a_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4a_za32_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4a za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.wide.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -204,9 +174,7 @@ define void @mop4s_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4s_za32_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4s za0.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -217,9 +185,7 @@ define void @mop4a_za64_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za64_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -230,9 +196,7 @@ define void @mop4s_za64_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za64_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -243,9 +207,7 @@ define void @mop4a_za64_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4a_za64_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -256,9 +218,7 @@ define void @mop4s_za64_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <v
 ; CHECK-LABEL: mop4s_za64_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -269,9 +229,7 @@ define void @mop4a_za64_s16_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4a_za64_s16_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -282,9 +240,7 @@ define void @mop4s_za64_s16_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4s_za64_s16_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sumop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sumop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -295,9 +251,7 @@ define void @mop4a_za64_u16_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4a_za64_u16_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4a za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4a.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -308,9 +262,7 @@ define void @mop4s_za64_u16_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2
 ; CHECK-LABEL: mop4s_za64_u16_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    usmop4s za0.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmop4s.za64.wide.2x2.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -322,9 +274,7 @@ define void @mop4a_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4a_za16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.h, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -335,9 +285,7 @@ define void @mop4s_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: mop4s_za16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.h, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -348,9 +296,7 @@ define void @mop4a_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2
 ; CHECK-LABEL: mop4a_za32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.s, { z0.s, z1.s }, { z24.s, z25.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x2.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
@@ -361,9 +307,7 @@ define void @mop4s_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2
 ; CHECK-LABEL: mop4s_za32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.s, { z0.s, z1.s }, { z24.s, z25.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
@@ -374,9 +318,7 @@ define void @mop4a_za64_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %z
 ; CHECK-LABEL: mop4a_za64_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4a za0.d, { z0.d, z1.d }, { z24.d, z25.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x2.nxv2f64(i32 0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
@@ -387,9 +329,7 @@ define void @mop4s_za64_f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %z
 ; CHECK-LABEL: mop4s_za64_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za0.d, { z0.d, z1.d }, { z24.d, z25.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv2f64(i32 0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
@@ -400,9 +340,7 @@ define void @mop4a_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4a_za16_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4a za0.h, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4a.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -413,9 +351,7 @@ define void @mop4s_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %
 ; CHECK-LABEL: mop4s_za16_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4s za0.h, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -428,9 +364,7 @@ define void @mop4s_za32_s8_limit(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn
 ; CHECK-LABEL: mop4s_za32_s8_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za3.s, { z0.b, z1.b }, { z24.b, z25.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv16i8(i32 3, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2)
@@ -441,9 +375,7 @@ define void @mop4s_za32_s16_limit(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %z
 ; CHECK-LABEL: mop4s_za32_s16_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za3.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.wide.2x2.nxv8i16(i32 3, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -454,9 +386,7 @@ define void @mop4s_za32_f16_limit(<vscale x 8 x half> %zn1, <vscale x 8 x half>
 ; CHECK-LABEL: mop4s_za32_f16_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za3.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8f16(i32 3, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -467,9 +397,7 @@ define void @mop4s_za32_bf16_limit(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfl
 ; CHECK-LABEL: mop4s_za32_bf16_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4s za3.s, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.wide.2x2.nxv8bf16(i32 3, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -480,9 +408,7 @@ define void @mop4s_za64_s16_limit(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %z
 ; CHECK-LABEL: mop4s_za64_s16_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smop4s za7.d, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smop4s.za64.wide.2x2.nxv8i16(i32 7, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -493,9 +419,7 @@ define void @mop4s_za64_f64_limit(<vscale x 2 x double> %zn1, <vscale x 2 x doub
 ; CHECK-LABEL: mop4s_za64_f64_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za7.d, { z0.d, z1.d }, { z24.d, z25.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv2f64(i32 7, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
@@ -506,9 +430,7 @@ define void @mop4s_za32_f32_limit(<vscale x 4 x float> %zn1, <vscale x 4 x float
 ; CHECK-LABEL: mop4s_za32_f32_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za3.s, { z0.s, z1.s }, { z24.s, z25.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv4f32(i32 3, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
@@ -519,9 +441,7 @@ define void @mop4s_za16_f16_limit(<vscale x 8 x half> %zn1, <vscale x 8 x half>
 ; CHECK-LABEL: mop4s_za16_f16_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmop4s za1.h, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv8f16(i32 1, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -532,9 +452,7 @@ define void @mop4s_za16_bf16_limit(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfl
 ; CHECK-LABEL: mop4s_za16_bf16_limit:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmop4s za1.h, { z0.h, z1.h }, { z24.h, z25.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.mop4s.2x2.nxv8bf16(i32 1, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
index 8cc0d0314f64a..e001785d504a2 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
@@ -324,19 +324,19 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x2_s64
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
               @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -347,19 +347,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
               @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -370,19 +370,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
               @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -393,19 +393,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
               @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
@@ -480,19 +480,19 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_uhl_x2_u64
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
               @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -503,19 +503,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
               @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -526,19 +526,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
               @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -549,19 +549,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
               @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll
index b9b8469d692bd..c93755dca28d8 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sclamp.ll
@@ -6,8 +6,6 @@ target triple = "aarch64-linux-gnu"
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: test_sclamp_single_x2_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.b, z1.b }, z2.b, z3.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -17,8 +15,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vsc
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) {
 ; CHECK-LABEL: test_sclamp_single_x2_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x  8 x i16>, <vscale x  8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -28,8 +24,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vs
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) {
 ; CHECK-LABEL: test_sclamp_single_x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -39,8 +33,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vs
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
 ; CHECK-LABEL: test_sclamp_single_x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -50,10 +42,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vs
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) {
 ; CHECK-LABEL: test_sclamp_single_x4_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.b - z3.b }, z4.b, z5.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -63,10 +51,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) {
 ; CHECK-LABEL: test_sclamp_single_x4_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -76,10 +60,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) {
 ; CHECK-LABEL: test_sclamp_single_x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -89,10 +69,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) {
 ; CHECK-LABEL: test_sclamp_single_x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
index 5e94f11390a82..f0a140d3d52c3 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
@@ -8,7 +8,6 @@ define <vscale x 2 x i64> @test_tileslice_no_add(i32 %idx) #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
 ; CHECK-NEXT:    ret
 entry:
   %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx)
@@ -21,7 +20,6 @@ define <vscale x 2 x i64> @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w0, w1
 ; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
 ; CHECK-NEXT:    ret
 entry:
   %add = add i32 %idx1, %idx2
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
index ab8c136fc41c2..e2869788d6cc4 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
@@ -196,19 +196,19 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_mul
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                        <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -220,19 +220,19 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -244,19 +244,19 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                         <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -268,19 +268,19 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z30.d, z7.d
-; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z29.d, z6.d
-; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z28.d, z5.d
-; CHECK-NEXT:    mov z25.d, z2.d
-; CHECK-NEXT:    ldr z31, [x0]
-; CHECK-NEXT:    mov z24.d, z1.d
-; CHECK-NEXT:    sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
-; CHECK-NEXT:    mov z0.d, z24.d
-; CHECK-NEXT:    mov z1.d, z25.d
-; CHECK-NEXT:    mov z2.d, z26.d
-; CHECK-NEXT:    mov z3.d, z27.d
+; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    ldr z27, [x0]
+; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z0.d, z4.d
+; CHECK-NEXT:    mov z1.d, z5.d
+; CHECK-NEXT:    mov z2.d, z6.d
+; CHECK-NEXT:    mov z3.d, z7.d
 ; CHECK-NEXT:    ret
                                         <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
index c2865990faab6..7387f889d8b26 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
@@ -8,9 +8,7 @@
 define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -27,9 +25,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
 define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -50,11 +46,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
 define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -75,11 +67,7 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
 define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -105,11 +93,7 @@ define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -128,11 +112,7 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -155,15 +135,7 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -187,15 +159,7 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -225,9 +189,7 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -240,9 +202,7 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -255,9 +215,7 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    fsub za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -272,9 +230,7 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    fsub za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -291,11 +247,7 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -314,11 +266,7 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -337,11 +285,7 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    fsub za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -360,11 +304,7 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_f64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    fsub za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
index efd5f951eced3..e918137bee27d 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-tmop.ll
@@ -7,8 +7,6 @@ define void @stmopa_za32_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <v
 ; CHECK-LABEL: stmopa_za32_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    stmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.stmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -19,8 +17,6 @@ define void @utmopa_za32_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <v
 ; CHECK-LABEL: utmopa_za32_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    utmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.utmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -31,8 +27,6 @@ define void @ustmopa_za32_u8_s8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2
 ; CHECK-LABEL: ustmopa_za32_u8_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ustmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ustmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -43,8 +37,6 @@ define void @sutmopa_za32_s8_u8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2
 ; CHECK-LABEL: sutmopa_za32_s8_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sutmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.sutmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -55,8 +47,6 @@ define void @stmopa_za32_s16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <
 ; CHECK-LABEL: stmopa_za32_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    stmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.stmopa.za32.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -67,8 +57,6 @@ define void @utmopa_za32_u16(<vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <
 ; CHECK-LABEL: utmopa_za32_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    utmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.utmopa.za32.nxv8i16(i32 0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -79,8 +67,6 @@ define void @ftmopa_za32_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: ftmopa_za32_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za32.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -91,8 +77,6 @@ define void @bftmopa_za32_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat>
 ; CHECK-LABEL: bftmopa_za32_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bftmopa za0.s, { z0.h, z1.h }, z2.h, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za32.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -103,8 +87,6 @@ define void @ftmopa_za32_f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn
 ; CHECK-LABEL: ftmopa_za32_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ftmopa za0.s, { z0.s, z1.s }, z2.s, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za32.nxv4f32(i32 0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -115,8 +97,6 @@ define void @ftmopa_za16_f16(<vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2,
 ; CHECK-LABEL: ftmopa_za16_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za16.nxv8f16(i32 0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -127,8 +107,6 @@ define void @bftmopa_za16_bf16(<vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat>
 ; CHECK-LABEL: bftmopa_za16_bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bftmopa za0.h, { z0.h, z1.h }, z2.h, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za16.nxv8bf16(i32 0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -139,8 +117,6 @@ define void @ftmopa_za16_f8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <v
 ; CHECK-LABEL: ftmopa_za16_f8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ftmopa za0.h, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za16.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
@@ -151,8 +127,6 @@ define void @ftmopa_za32_f8(<vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <v
 ; CHECK-LABEL: ftmopa_za32_f8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z28.d, z3.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ftmopa za0.s, { z0.b, z1.b }, z2.b, z28[0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.ftmopa.za32.nxv16i8(i32 0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zm, <vscale x 16 x i8> %zk, i32 0)
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll
index be0391b67e476..dd9415a396d35 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-uclamp.ll
@@ -6,8 +6,6 @@ target triple = "aarch64-linux-gnu"
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: test_uclamp_single_x2_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.b, z1.b }, z2.b, z3.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -17,8 +15,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vsc
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) {
 ; CHECK-LABEL: test_uclamp_single_x2_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x  8 x i16>, <vscale x  8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -28,8 +24,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vs
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) {
 ; CHECK-LABEL: test_uclamp_single_x2_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -39,8 +33,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vs
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
 ; CHECK-LABEL: test_uclamp_single_x2_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -50,10 +42,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vs
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) {
 ; CHECK-LABEL: test_uclamp_single_x4_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.b - z3.b }, z4.b, z5.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -63,10 +51,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) {
 ; CHECK-LABEL: test_uclamp_single_x4_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -76,10 +60,6 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) {
 ; CHECK-LABEL: test_uclamp_single_x4_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -89,10 +69,6 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) {
 ; CHECK-LABEL: test_uclamp_single_x4_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)
diff --git a/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll b/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll
index cdd032cd2a52c..d4e594e35722f 100644
--- a/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll
+++ b/llvm/test/CodeGen/AArch64/sme2p2-intrinsics-fmul.ll
@@ -4,8 +4,6 @@
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_mul_single_x2_f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmul { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmul.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm)
@@ -15,10 +13,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_mul_single_x2_f16
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>  } @multi_vec_mul_single_x4_f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmul { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>  } @llvm.aarch64.sve.fmul.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm)
@@ -28,10 +22,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_mul_x2_f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2) {
 ; CHECK-LABEL: multi_vec_mul_x2_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fmul { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fmul.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2)
@@ -41,14 +31,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_mul_x2_f16(<vscal
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>  } @multi_vec_mul_x4_f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_mul_x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fmul { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>  } @llvm.aarch64.sve.fmul.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
@@ -58,8 +40,6 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_mul_single_x2_f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmul { z0.s, z1.s }, { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmul.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm)
@@ -69,10 +49,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_mul_single_x2_f
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>  } @multi_vec_mul_single_x4_f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmul { z0.s - z3.s }, { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>  } @llvm.aarch64.sve.fmul.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm)
@@ -82,10 +58,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_mul_x2_f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2) {
 ; CHECK-LABEL: multi_vec_mul_x2_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fmul { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fmul.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2)
@@ -95,14 +67,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_mul_x2_f32(<vsc
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>  } @multi_vec_mul_x4_f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_mul_x4_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fmul { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>  } @llvm.aarch64.sve.fmul.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4)
@@ -112,8 +76,6 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_mul_single_x2_f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmul { z0.d, z1.d }, { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmul.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm)
@@ -123,10 +85,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_mul_single_x2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>  } @multi_vec_mul_single_x4_f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vec_mul_single_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmul { z0.d - z3.d }, { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>  } @llvm.aarch64.sve.fmul.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm)
@@ -136,10 +94,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_mul_x2_f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2) {
 ; CHECK-LABEL: multi_vec_mul_x2_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fmul { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fmul.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2)
@@ -149,14 +103,6 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_mul_x2_f64(<v
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>  } @multi_vec_mul_x4_f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_mul_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fmul { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>  } @llvm.aarch64.sve.fmul.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4)
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
index 895271d8bfdc8..20343f2da18c8 100644
--- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -94,6 +94,7 @@ define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
 ; CHECK-MOPS-NEXT:    adrp x8, :got:dst
 ; CHECK-MOPS-NEXT:    mov w9, #2 // =0x2
 ; CHECK-MOPS-NEXT:    ldr x8, [x8, :got_lo12:dst]
+; CHECK-MOPS-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-MOPS-NEXT:    setp [x8]!, x0!, x9
 ; CHECK-MOPS-NEXT:    setm [x8]!, x0!, x9
 ; CHECK-MOPS-NEXT:    sete [x8]!, x0!, x9
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll b/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll
index 7f79c9c5431ea..649188431f4d8 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mattr=+sve,+bf16            < %s | FileCheck %s -check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s -check-prefixes=CHECK,SME
+; RUN: llc -enable-subreg-liveness -mattr=+sve,+bf16            < %s | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s -check-prefixes=CHECK,SME
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -224,8 +224,8 @@ define float @promoted_fadd(<vscale x 4 x bfloat> %a) {
 ; SVE-NEXT:    ptrue p0.s
 ; SVE-NEXT:    faddv s0, p0, z0.s
 ; SVE-NEXT:    bfcvt h0, s0
+; SVE-NEXT:    // kill: def $d0 killed $h0
 ; SVE-NEXT:    shll v0.4s, v0.4h, #16
-; SVE-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SVE-NEXT:    ret
 ;
 ; SME-LABEL: promoted_fadd:
@@ -234,6 +234,7 @@ define float @promoted_fadd(<vscale x 4 x bfloat> %a) {
 ; SME-NEXT:    ptrue p0.s
 ; SME-NEXT:    faddv s0, p0, z0.s
 ; SME-NEXT:    bfcvt h0, s0
+; SME-NEXT:    // kill: def $s0 killed $h0
 ; SME-NEXT:    fmov w8, s0
 ; SME-NEXT:    lsl w8, w8, #16
 ; SME-NEXT:    fmov s0, w8
@@ -251,7 +252,6 @@ define float @promoted_fmax(<vscale x 4 x bfloat> %a) {
 ; CHECK-NEXT:    lsl z0.s, z0.s, #16
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %rdx = call bfloat @llvm.vector.reduce.fmax.nxv4bf16(<vscale x 4 x bfloat> %a)
   %res = fpext bfloat %rdx to float
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
index e71d983664cd9..ae7aa9b35f62a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll
@@ -27,7 +27,6 @@ define <8 x i16> @two_way_i8_i16_vl128(ptr %accptr, ptr %uptr, ptr %sptr) {
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    umlalb z0.h, z2.b, z1.b
 ; SME-NEXT:    umlalt z0.h, z2.b, z1.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <8 x i16>, ptr %accptr
   %u = load <16 x i8>, ptr %uptr
@@ -61,8 +60,6 @@ define <16 x i16> @two_way_i8_i16_vl128_double_width(ptr %accptr, ptr %uptr, ptr
 ; SME-NEXT:    umlalb z1.h, z4.b, z2.b
 ; SME-NEXT:    umlalt z0.h, z5.b, z3.b
 ; SME-NEXT:    umlalt z1.h, z4.b, z2.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <16 x i16>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -115,8 +112,6 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-NEXT:    umlalt z0.h, z2.b, z1.b
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <16 x i16>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -150,7 +145,6 @@ define <4 x i32> @two_way_i16_i32_vl128(ptr %accptr, ptr %uptr, ptr %sptr) {
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    umlalb z0.s, z2.h, z1.h
 ; SME-NEXT:    umlalt z0.s, z2.h, z1.h
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <4 x i32>, ptr %accptr
   %u = load <8 x i16>, ptr %uptr
@@ -184,8 +178,6 @@ define <8 x i32> @two_way_i16_i32_vl128_double_width(ptr %accptr, ptr %uptr, ptr
 ; SME-NEXT:    umlalb z1.s, z4.h, z2.h
 ; SME-NEXT:    umlalt z0.s, z5.h, z3.h
 ; SME-NEXT:    umlalt z1.s, z4.h, z2.h
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <16 x i16>, ptr %uptr
@@ -238,8 +230,6 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-NEXT:    umlalt z0.s, z2.h, z1.h
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <16 x i16>, ptr %uptr
@@ -273,7 +263,6 @@ define <2 x i64> @two_way_i32_i64_vl128(ptr %accptr, ptr %uptr, ptr %sptr) {
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    umlalb z0.d, z2.s, z1.s
 ; SME-NEXT:    umlalt z0.d, z2.s, z1.s
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <2 x i64>, ptr %accptr
   %u = load <4 x i32>, ptr %uptr
@@ -307,8 +296,6 @@ define <4 x i64> @two_way_i32_i64_vl128_double_width(ptr %accptr, ptr %uptr, ptr
 ; SME-NEXT:    umlalb z1.d, z4.s, z2.s
 ; SME-NEXT:    umlalt z0.d, z5.s, z3.s
 ; SME-NEXT:    umlalt z1.d, z4.s, z2.s
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <8 x i32>, ptr %uptr
@@ -361,8 +348,6 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-NEXT:    umlalt z0.d, z2.s, z1.s
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <8 x i32>, ptr %uptr
@@ -395,7 +380,6 @@ define <4 x i32> @four_way_i8_i32_vl128(ptr %accptr, ptr %uptr, ptr %sptr) {
 ; SME-NEXT:    ldr q1, [x1]
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    udot z0.s, z2.b, z1.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <4 x i32>, ptr %accptr
   %u = load <16 x i8>, ptr %uptr
@@ -422,7 +406,6 @@ define <4 x i32> @four_way_i8_i32_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
 ; SME-NEXT:    ldr q1, [x1]
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    usdot z0.s, z1.b, z2.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <4 x i32>, ptr %accptr
   %u = load <16 x i8>, ptr %uptr
@@ -449,7 +432,6 @@ define <4 x i32> @four_way_i8_i32_vl128_sudot(ptr %accptr, ptr %uptr, ptr %sptr)
 ; SME-NEXT:    ldr q1, [x1]
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    usdot z0.s, z2.b, z1.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <4 x i32>, ptr %accptr
   %u = load <16 x i8>, ptr %uptr
@@ -496,7 +478,6 @@ define <2 x i64> @four_way_i8_i64_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
 ; SME-NEXT:    ldr q1, [x0]
 ; SME-NEXT:    saddwb z1.d, z1.d, z0.s
 ; SME-NEXT:    saddwt z0.d, z1.d, z0.s
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <2 x i64>, ptr %accptr
   %u = load <16 x i8>, ptr %uptr
@@ -543,7 +524,6 @@ define <2 x i64> @four_way_i16_i64_vl128_usdot(ptr %accptr, ptr %uptr, ptr %sptr
 ; SME-NEXT:    ld1h { z1.d }, p0/z, [x1, x8, lsl #1]
 ; SME-NEXT:    ld1sh { z2.d }, p0/z, [x2, x8, lsl #1]
 ; SME-NEXT:    mla z0.d, p0/m, z2.d, z1.d
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <2 x i64>, ptr %accptr
   %u = load <8 x i16>, ptr %uptr
@@ -573,8 +553,6 @@ define <8 x i32> @four_way_i8_i32_vl128_double_width(ptr %accptr, ptr %uptr, ptr
 ; SME-NEXT:    ldp q5, q4, [x2]
 ; SME-NEXT:    udot z0.s, z5.b, z3.b
 ; SME-NEXT:    udot z1.s, z4.b, z2.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -604,8 +582,6 @@ define <8 x i32> @four_way_i8_i32_vl128_double_width_usdot(ptr %accptr, ptr %upt
 ; SME-NEXT:    ldp q5, q4, [x2]
 ; SME-NEXT:    usdot z0.s, z3.b, z5.b
 ; SME-NEXT:    usdot z1.s, z2.b, z4.b
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -649,8 +625,6 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-NEXT:    udot z0.s, z2.b, z1.b
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -694,8 +668,6 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr)
 ; SME-NEXT:    usdot z0.s, z1.b, z2.b
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <8 x i32>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -732,7 +704,6 @@ define <2 x i64> @four_way_i16_i64_vl128(ptr %accptr, ptr %uptr, ptr %sptr) {
 ; SME-NEXT:    ldr q1, [x1]
 ; SME-NEXT:    ldr q2, [x2]
 ; SME-NEXT:    udot z0.d, z2.h, z1.h
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <2 x i64>, ptr %accptr
   %u = load <8 x i16>, ptr %uptr
@@ -772,8 +743,6 @@ define <4 x i64> @four_way_i16_i64_vl128_double_width(ptr %accptr, ptr %uptr, pt
 ; SME-NEXT:    ldp q5, q4, [x2]
 ; SME-NEXT:    udot z0.d, z5.h, z3.h
 ; SME-NEXT:    udot z1.d, z4.h, z2.h
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <16 x i16>, ptr %uptr
@@ -827,8 +796,6 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca
 ; SME-NEXT:    udot z0.d, z2.h, z1.h
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <16 x i16>, ptr %uptr
@@ -881,7 +848,6 @@ define <2 x i64> @eight_way_i8_i64_vl128(ptr %accptr, ptr %uptr, ptr %sptr) {
 ; SME-NEXT:    ldr q1, [x0]
 ; SME-NEXT:    uaddwb z1.d, z1.d, z0.s
 ; SME-NEXT:    uaddwt z0.d, z1.d, z0.s
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SME-NEXT:    ret
   %acc = load <2 x i64>, ptr %accptr
   %u = load <16 x i8>, ptr %uptr
@@ -944,8 +910,6 @@ define <4 x i64> @four_way_i8_i64_vl128_double_width(ptr %accptr, ptr %uptr, ptr
 ; SME-NEXT:    uaddwb z2.d, z2.d, z1.s
 ; SME-NEXT:    uaddwt z0.d, z3.d, z0.s
 ; SME-NEXT:    uaddwt z1.d, z2.d, z1.s
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
@@ -1001,8 +965,6 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal
 ; SME-NEXT:    uaddwt z0.d, z0.d, z2.s
 ; SME-NEXT:    movprfx z1, z0
 ; SME-NEXT:    ext z1.b, z1.b, z0.b, #16
-; SME-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; SME-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; SME-NEXT:    ret
   %acc = load <4 x i64>, ptr %accptr
   %u = load <32 x i8>, ptr %uptr
diff --git a/llvm/test/CodeGen/AArch64/sve-fmsub.ll b/llvm/test/CodeGen/AArch64/sve-fmsub.ll
index d2e129e5db796..7cc552388980a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fmsub.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fmsub.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
-; RUN: llc -mattr=+sve %s -o - | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mattr=+sme -force-streaming %s -o - | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mattr=+sve -enable-subreg-liveness %s -o - | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mattr=+sme -force-streaming -enable-subreg-liveness  %s -o - | FileCheck %s --check-prefixes=CHECK
 
 target triple = "aarch64"
 
@@ -47,11 +47,7 @@ define <2 x double> @fma_negC_v2f64(<2 x double> %a, <2 x double> %b, <2 x doubl
 ; CHECK-LABEL: fma_negC_v2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmsb z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <2 x double> %c
@@ -63,11 +59,7 @@ define <4 x float> @fma_negC_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %
 ; CHECK-LABEL: fma_negC_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmsb z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <4 x float> %c
@@ -79,11 +71,7 @@ define <8 x half> @fma_negC_v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 ; CHECK-LABEL: fma_negC_v8f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmsb z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <8 x half> %c
@@ -95,11 +83,7 @@ define <4 x float> @fma_negC_commutative_v4f32(<4 x float> %c, <4 x float> %a, <
 ; CHECK-LABEL: fma_negC_commutative_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmls z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <4 x float> %c
@@ -152,11 +136,7 @@ define <2 x double> @fma_negA_negC_v2f64(<2 x double> %a, <2 x double> %b, <2 x
 ; CHECK-LABEL: fma_negA_negC_v2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <2 x double> %a
@@ -169,11 +149,7 @@ define <4 x float> @fma_negA_negC_v4f32(<4 x float> %a, <4 x float> %b, <4 x flo
 ; CHECK-LABEL: fma_negA_negC_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <4 x float> %a
@@ -186,11 +162,7 @@ define <8 x half> @fma_negA_negC_v8f16(<8 x half> %a, <8 x half> %b, <8 x half>
 ; CHECK-LABEL: fma_negA_negC_v8f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <8 x half> %a
@@ -203,11 +175,7 @@ define <4 x float> @fma_negA_negC_commutative_v4f32(<4 x float> %c, <4 x float>
 ; CHECK-LABEL: fma_negA_negC_commutative_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <4 x float> %a
@@ -259,11 +227,7 @@ define <2 x double> @fma_negB_negC_v2f64(<2 x double> %a, <2 x double> %b, <2 x
 ; CHECK-LABEL: fma_negB_negC_v2f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <2 x double> %b
@@ -276,11 +240,7 @@ define <4 x float> @fma_negB_negC_v4f32(<4 x float> %a, <4 x float> %b, <4 x flo
 ; CHECK-LABEL: fma_negB_negC_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <4 x float> %b
@@ -293,11 +253,7 @@ define <8 x half> @fma_negB_negC_v8f16(<8 x half> %a, <8 x half> %b, <8 x half>
 ; CHECK-LABEL: fma_negB_negC_v8f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <8 x half> %b
@@ -310,11 +266,7 @@ define <4 x float> @fma_negB_negC_commutative_v4f32(<4 x float> %c, <4 x float>
 ; CHECK-LABEL: fma_negB_negC_commutative_v4f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fnmla z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %neg = fneg <4 x float> %b
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
index da63a6748e612..27a380772dd51 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ;
 ; Testing prfop encodings
@@ -123,7 +123,7 @@ define void @test_svprf_vnum_under(<vscale x 16 x i1> %pg, ptr %base) {
 ; CHECK-LABEL: test_svprf_vnum_under:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    mov x9, #-528
+; CHECK-NEXT:    mov x9, #-528 // =0xfffffffffffffdf0
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    prfb pstl3strm, p0, [x0, x8]
@@ -149,8 +149,9 @@ define void @test_svprf_vnum_over(<vscale x 16 x i1> %pg, ptr %base) {
 ; CHECK-LABEL: test_svprf_vnum_over:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    mov w9, #512
+; CHECK-NEXT:    mov w9, #512 // =0x200
 ; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    prfb pstl3strm, p0, [x0, x8]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
index bd43050cfae9b..3bca8170398c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll
@@ -394,7 +394,7 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK-LABEL: ld4.nxv64i8_outside_lower_bound:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    mov x9, #-576
+; CHECK-NEXT:    mov x9, #-576 // =0xfffffffffffffdc0
 ; CHECK-NEXT:    lsr x8, x8, #4
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
@@ -410,14 +410,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 }
 
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @ld4.nxv64i8_outside_upper_bound(<vscale x 16 x i1> %Pg, ptr %addr) {
-; CHECK-LABEL: ld4.nxv64i8_outside_upper_bound:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    rdvl x8, #1
-; CHECK-NEXT:    mov w9, #512
-; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    ld4b { z0.b - z3.b }, p0/z, [x0, x8]
-; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #16) #2)
 ; xM = 2^9
 ; xP = RDVL * 2^-4
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index 5127fa7e93480..9d9a147e1af7e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ;
 ; Converting to svbool_t (<vscale x 16 x i1>)
@@ -153,7 +153,6 @@ define <vscale x 16 x i1> @chained_reinterpret() {
 define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    ret
@@ -166,7 +165,6 @@ define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
 define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
@@ -179,7 +177,6 @@ define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
 define <vscale x 16 x i1> @reinterpret_scalar_bool_q(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_q:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
index f4ab2025f7182..213475c742da9 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for
 ;   * @llvm.aarch64.sve.sqinc{b|h|w|d|p}, and
@@ -92,9 +92,7 @@ define <vscale x 2 x i64> @sqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 define i32 @sqdecb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
@@ -103,7 +101,6 @@ define i32 @sqdecb_n32_i32(i32 %a) {
 define i64 @sqdecb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
@@ -128,9 +125,7 @@ define i64 @sqdecb_n64(i64 %a) {
 define i32 @sqdech_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdech x0, w0, vl5, mul #6
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
@@ -139,7 +134,6 @@ define i32 @sqdech_n32_i32(i32 %a) {
 define i64 @sqdech_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdech x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 3, i32 4)
@@ -164,9 +158,7 @@ define i64 @sqdech_n64(i64 %a) {
 define i32 @sqdecw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecw x0, w0, vl7, mul #8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
@@ -175,7 +167,6 @@ define i32 @sqdecw_n32_i32(i32 %a) {
 define i64 @sqdecw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecw x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 3, i32 4)
@@ -200,9 +191,7 @@ define i64 @sqdecw_n64(i64 %a) {
 define i32 @sqdecd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecd x0, w0, vl16, mul #10
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
@@ -211,7 +200,6 @@ define i32 @sqdecd_n32_i32(i32 %a) {
 define i64 @sqdecd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecd x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 3, i32 4)
@@ -236,9 +224,7 @@ define i64 @sqdecd_n64(i64 %a) {
 define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.b, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
@@ -247,7 +233,6 @@ define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.b, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
@@ -259,9 +244,7 @@ define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.h, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
@@ -270,7 +253,6 @@ define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.h, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
@@ -282,9 +264,7 @@ define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.s, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
@@ -293,7 +273,6 @@ define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.s, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
@@ -305,9 +284,7 @@ define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.d, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
@@ -316,7 +293,6 @@ define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 define i64 @sqdecp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.d, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
index aa3403b714cbc..394378f0f83d5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for
 ;   * @llvm.aarch64.sve.sqinc{b|h|w|d|p}, and
@@ -92,9 +92,7 @@ define <vscale x 2 x i64> @sqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 define i32 @sqincb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
@@ -103,7 +101,6 @@ define i32 @sqincb_n32_i32(i32 %a) {
 define i64 @sqincb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
@@ -128,9 +125,7 @@ define i64 @sqincb_n64(i64 %a) {
 define i32 @sqinch_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqinch x0, w0, vl5, mul #6
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
@@ -139,7 +134,6 @@ define i32 @sqinch_n32_i32(i32 %a) {
 define i64 @sqinch_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqinch x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 3, i32 4)
@@ -164,9 +158,7 @@ define i64 @sqinch_n64(i64 %a) {
 define i32 @sqincw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincw x0, w0, vl7, mul #8
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
@@ -175,7 +167,6 @@ define i32 @sqincw_n32_i32(i32 %a) {
 define i64 @sqincw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincw x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 3, i32 4)
@@ -200,9 +191,7 @@ define i64 @sqincw_n64(i64 %a) {
 define i32 @sqincd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincd x0, w0, vl16, mul #10
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
@@ -211,7 +200,6 @@ define i32 @sqincd_n32_i32(i32 %a) {
 define i64 @sqincd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincd x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 3, i32 4)
@@ -236,9 +224,7 @@ define i64 @sqincd_n64(i64 %a) {
 define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.b, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
@@ -247,7 +233,6 @@ define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.b, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
@@ -259,9 +244,7 @@ define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.h, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
@@ -270,7 +253,6 @@ define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.h, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
@@ -282,9 +264,7 @@ define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.s, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
@@ -293,7 +273,6 @@ define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.s, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
@@ -305,9 +284,7 @@ define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.d, w0
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
@@ -316,7 +293,6 @@ define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 define i64 @sqincp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.d, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index 579358ecb887e..b951e0f395893 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ; NOTE: invalid, upper and lower bound immediate values of the reg+imm
 ; addressing mode are checked only for the byte version of each
@@ -15,8 +15,6 @@
 define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 2, i64 0
@@ -30,9 +28,7 @@ define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -46,9 +42,7 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vsca
 define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #-18
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -18, i64 0
@@ -62,9 +56,7 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #16
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 16, i64 0
@@ -78,8 +70,6 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16, i64 0
@@ -93,8 +83,6 @@ define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14, i64 0
@@ -112,8 +100,6 @@ define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 2, i64 0
@@ -127,8 +113,6 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 2, i64 0
@@ -146,8 +130,6 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 4, i64 0
@@ -161,8 +143,6 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 6, i64 0
@@ -180,8 +160,6 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 8, i64 0
@@ -195,8 +173,6 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 10, i64 0
@@ -214,9 +190,6 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -231,10 +204,7 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -249,10 +219,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
 define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #5
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -267,10 +234,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
 define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #-27
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -27, i64 0
@@ -285,10 +249,7 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #24
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 24, i64 0
@@ -303,9 +264,6 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24, i64 0
@@ -320,9 +278,6 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21, i64 0
@@ -341,9 +296,6 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 6, i64 0
@@ -358,9 +310,6 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 9, i64 0
@@ -379,9 +328,6 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 12, i64 0
@@ -396,9 +342,6 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 15, i64 0
@@ -417,9 +360,6 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 18, i64 0
@@ -434,9 +374,6 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 -3, i64 0
@@ -455,10 +392,6 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -474,11 +407,7 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #5
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -494,11 +423,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
 define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #6
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 6, i64 0
@@ -514,11 +439,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
 define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #7
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 7, i64 0
@@ -536,12 +457,8 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov x9, #-576 // =0xfffffffffffffdc0
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
@@ -563,12 +480,9 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov w9, #512 // =0x200
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
@@ -588,10 +502,6 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32, i64 0
@@ -607,10 +517,6 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28, i64 0
@@ -630,10 +536,6 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 8, i64 0
@@ -649,10 +551,6 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 12, i64 0
@@ -672,10 +570,6 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 16, i64 0
@@ -691,10 +585,6 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 20, i64 0
@@ -714,10 +604,6 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 24, i64 0
@@ -733,10 +619,6 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28, i64 0
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index c8fc8d7a70cc6..c05d8306d3568 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ;
 ; ST2B
@@ -9,8 +9,6 @@
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -28,8 +26,6 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -43,8 +39,6 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -62,8 +56,6 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -77,8 +69,6 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -96,8 +86,6 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -111,8 +99,6 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
@@ -130,9 +116,6 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -151,9 +134,6 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -168,9 +148,6 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -189,9 +166,6 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -206,9 +180,6 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -227,9 +198,6 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -244,9 +212,6 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
@@ -265,10 +230,6 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -288,10 +249,6 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -307,10 +264,6 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -330,10 +283,6 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -349,10 +298,6 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -372,10 +317,6 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -391,10 +332,6 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index b09baa6bf7e0a..d4647b1ba46e5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ;
 ; ST2B
@@ -9,8 +9,6 @@
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
@@ -27,8 +25,6 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
@@ -41,8 +37,6 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
@@ -55,8 +49,6 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st2h_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -73,8 +65,6 @@ define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
@@ -87,8 +77,6 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
@@ -105,8 +93,6 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
@@ -119,8 +105,6 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
@@ -133,8 +117,6 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_ptr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -151,9 +133,6 @@ define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -171,9 +150,6 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -187,9 +163,6 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -203,9 +176,6 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st3h_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -223,9 +193,6 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -239,9 +206,6 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -259,9 +223,6 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -275,9 +236,6 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -291,9 +249,6 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_ptr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -311,10 +266,6 @@ define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -333,10 +284,6 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -351,10 +298,6 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -369,10 +312,6 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st4h_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -391,10 +330,6 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -409,10 +344,6 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -431,10 +362,6 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -449,10 +376,6 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
@@ -467,10 +390,6 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x ptr> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_ptr:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2p0(<vscale x 2 x ptr> %v0,
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
index 4005e7d99400d..c60d69b9fb5a4 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ;
 ; WHILELE
@@ -82,6 +82,7 @@ define <vscale x 2 x i1> @whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilele p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3)
@@ -101,6 +102,7 @@ define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilele p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9)
@@ -217,6 +219,7 @@ define <vscale x 2 x i1> @whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3)
@@ -236,6 +239,7 @@ define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9)
@@ -342,6 +346,7 @@ define <vscale x 2 x i1> @whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3)
@@ -361,6 +366,7 @@ define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilels p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9)
@@ -476,6 +482,7 @@ define <vscale x 2 x i1> @whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelt p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3)
@@ -495,6 +502,7 @@ define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelt p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
index 4ad5b38b256fe..36b8220bb3338 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme2p2 -force-streaming-compatible < %s | FileCheck %s --check-prefix=USE-NEON-NO-GPRS
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,10 +10,8 @@ define double @t1(double %x) {
 ; CHECK-LABEL: t1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t1:
@@ -37,10 +35,8 @@ define float @t2(float %x) {
 ; CHECK-LABEL: t2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t2:
@@ -64,10 +60,8 @@ define half @t3(half %x)  {
 ; CHECK-LABEL: t3:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t3:
@@ -93,10 +87,8 @@ define double @t4(double %x) {
 ; CHECK-LABEL: t4:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t4:
@@ -120,10 +112,8 @@ define float @t5(float %x) {
 ; CHECK-LABEL: t5:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t5:
@@ -147,10 +137,8 @@ define half @t6(half %x)  {
 ; CHECK-LABEL: t6:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; USE-NEON-NO-GPRS-LABEL: t6:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
index 3ae0089d409d0..cfdc1baf8c282 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-to-int.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -9,7 +9,6 @@ define i32 @f16_to_s32(half %x) {
 ; CHECK-LABEL: f16_to_s32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -28,7 +27,6 @@ define i64 @f16_to_s64(half %x) {
 ; CHECK-LABEL: f16_to_s64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -47,7 +45,6 @@ define i32 @f32_to_s32(float %x) {
 ; CHECK-LABEL: f32_to_s32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -65,7 +62,6 @@ define i64 @f32_to_s64(float %x) {
 ; CHECK-LABEL: f32_to_s64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -98,7 +94,6 @@ define i64 @f64_to_s64(double %x) {
 ; CHECK-LABEL: f64_to_s64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -116,7 +111,6 @@ define i32 @f16_to_u32(half %x) {
 ; CHECK-LABEL: f16_to_u32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -135,7 +129,6 @@ define i64 @f16_to_u64(half %x) {
 ; CHECK-LABEL: f16_to_u64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -154,7 +147,6 @@ define i32 @f32_to_u32(float %x) {
 ; CHECK-LABEL: f32_to_u32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -172,7 +164,6 @@ define i64 @f32_to_u64(float %x) {
 ; CHECK-LABEL: f32_to_u64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -205,7 +196,6 @@ define i64 @f64_to_u64(double %x) {
 ; CHECK-LABEL: f64_to_u64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
index d4221dab4fcff..83403fc4ad86f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-int-to-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -11,7 +11,6 @@ define half @s32_to_f16(i32 %x) {
 ; CHECK-NEXT:    fmov s0, w0
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: s32_to_f16:
@@ -30,7 +29,6 @@ define float @s32_to_f32(i32 %x) {
 ; CHECK-NEXT:    fmov s0, w0
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: s32_to_f32:
@@ -63,7 +61,6 @@ define half @u32_to_f16(i32 %x) {
 ; CHECK-NEXT:    fmov s0, w0
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: u32_to_f16:
@@ -82,7 +79,6 @@ define float @u32_to_f32(i32 %x) {
 ; CHECK-NEXT:    fmov s0, w0
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: u32_to_f32:
@@ -115,7 +111,6 @@ define half @s64_to_f16(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: s64_to_f16:
@@ -134,7 +129,6 @@ define float @s64_to_f32(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: s64_to_f32:
@@ -152,7 +146,6 @@ define double @s64_to_f64(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: s64_to_f64:
@@ -170,7 +163,6 @@ define half @u64_to_f16(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: u64_to_f16:
@@ -189,7 +181,6 @@ define float @u64_to_f32(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: u64_to_f32:
@@ -207,7 +198,6 @@ define double @u64_to_f64(i64 %x) {
 ; CHECK-NEXT:    fmov d0, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: u64_to_f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index 478072d33d8c9..43d8a7139c026 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,10 +10,8 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi8:
@@ -36,10 +34,8 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI1_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi8:
@@ -68,10 +64,8 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_16xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI2_0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi8:
@@ -112,13 +106,9 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 ; CHECK-LABEL: vls_sve_and_32xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_32xi8:
@@ -184,11 +174,9 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi16:
@@ -207,10 +195,8 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI5_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi16:
@@ -233,10 +219,8 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI6_0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI6_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi16:
@@ -265,13 +249,9 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_16xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI7_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi16:
@@ -313,9 +293,7 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z1.s, #0, #-1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi32:
@@ -334,10 +312,8 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI9_0
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI9_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi32:
@@ -358,13 +334,9 @@ define <8 x i32> @vls_sve_and_8xi32(<8 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi32:
@@ -390,9 +362,7 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z1.d, #0, #-1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi64:
@@ -411,12 +381,8 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z2.d, #0, #-1
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index bd49db8a4c414..39f8aa104f484 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,12 +12,10 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: ctlz_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sub z0.h, z0.h, #8 // =0x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i8:
@@ -51,9 +49,7 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: ctlz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v8i8:
@@ -103,9 +99,7 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: ctlz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v16i8:
@@ -339,12 +333,10 @@ define void @ctlz_v32i8(ptr %a) {
 define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: ctlz_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
 ; CHECK-NEXT:    sub z0.s, z0.s, #16 // =0x10
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i16:
@@ -369,9 +361,7 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: ctlz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i16:
@@ -405,9 +395,7 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: ctlz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v8i16:
@@ -546,9 +534,7 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: ctlz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i32:
@@ -571,9 +557,7 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: ctlz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i32:
@@ -646,9 +630,7 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: ctlz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v1i64:
@@ -669,9 +651,7 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: ctlz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i64:
@@ -732,11 +712,9 @@ define void @ctlz_v4i64(ptr %a) {
 define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: ctpop_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i8:
@@ -807,9 +785,7 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: ctpop_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v8i8:
@@ -932,9 +908,7 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: ctpop_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v16i8:
@@ -1602,11 +1576,9 @@ define void @ctpop_v32i8(ptr %a) {
 define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: ctpop_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i16:
@@ -1650,9 +1622,7 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: ctpop_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i16:
@@ -1723,9 +1693,7 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: ctpop_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v8i16:
@@ -2082,9 +2050,7 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: ctpop_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i32:
@@ -2128,9 +2094,7 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: ctpop_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i32:
@@ -2325,9 +2289,7 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: ctpop_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v1i64:
@@ -2359,9 +2321,7 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: ctpop_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i64:
@@ -2484,12 +2444,10 @@ define void @ctpop_v4i64(ptr %a) {
 define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: cttz_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    orr z0.h, z0.h, #0x100
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i8:
@@ -2527,10 +2485,8 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: cttz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v8i8:
@@ -2588,10 +2544,8 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: cttz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v16i8:
@@ -2875,12 +2829,10 @@ define void @cttz_v32i8(ptr %a) {
 define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: cttz_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    orr z0.s, z0.s, #0x10000
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i16:
@@ -2907,10 +2859,8 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: cttz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i16:
@@ -2948,10 +2898,8 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: cttz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v8i16:
@@ -3116,10 +3064,8 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: cttz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i32:
@@ -3144,10 +3090,8 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: cttz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i32:
@@ -3234,10 +3178,8 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: cttz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v1i64:
@@ -3259,10 +3201,8 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: cttz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index 71396da004002..03d8ed868e6a4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64"
 
@@ -29,8 +29,6 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
 ; CHECK-NEXT:    and z0.d, z1.d, z6.d
 ; CHECK-NEXT:    orr z1.d, z4.d, z2.d
 ; CHECK-NEXT:    orr z0.d, z0.d, z3.d
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index d226fc89c3381..07072168ff089 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -249,8 +249,6 @@ define void @build_vector_non_const_v4i1(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) {
 define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
@@ -272,8 +270,6 @@ define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) {
 define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -296,10 +292,6 @@ define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) {
 define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d, ptr %out)  {
 ; CHECK-LABEL: build_vector_non_const_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
-; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
-; CHECK-NEXT:    // kill: def $s3 killed $s3 def $z3
-; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    zip1 z2.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z2.d
@@ -327,10 +319,6 @@ define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d
 define void @build_vector_non_const_v4f64(double %a, double %b, double %c, double %d, ptr %out)  {
 ; CHECK-LABEL: build_vector_non_const_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d3 killed $d3 def $z3
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    stp q0, q2, [x0]
@@ -356,14 +344,6 @@ define void @build_vector_non_const_v4f64(double %a, double %b, double %c, doubl
 define void @build_vector_non_const_v8f16(half %a, half %b, half %c, half %d, half %e, half %f, half %g, half %h, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $h6 killed $h6 def $z6
-; CHECK-NEXT:    // kill: def $h4 killed $h4 def $z4
-; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
-; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
-; CHECK-NEXT:    // kill: def $h7 killed $h7 def $z7
-; CHECK-NEXT:    // kill: def $h5 killed $h5 def $z5
-; CHECK-NEXT:    // kill: def $h3 killed $h3 def $z3
-; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    zip1 z6.h, z6.h, z7.h
 ; CHECK-NEXT:    zip1 z4.h, z4.h, z5.h
 ; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index 6ec2b837eed2a..c2607b62d2d50 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,8 +12,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 ; CHECK-LABEL: concat_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.h, z1.h[3]
 ; CHECK-NEXT:    mov z3.h, z1.h[2]
 ; CHECK-NEXT:    mov z4.h, z1.h[1]
@@ -27,7 +25,6 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8i8:
@@ -61,11 +58,8 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2)  {
 ; CHECK-LABEL: concat_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v16i8:
@@ -140,14 +134,11 @@ define void @concat_v64i8(ptr %a, ptr %b, ptr %c) {
 define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
 ; CHECK-LABEL: concat_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    mov z3.s, z0.s[1]
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4i16:
@@ -172,11 +163,8 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
 define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2)  {
 ; CHECK-LABEL: concat_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8i16:
@@ -245,10 +233,7 @@ define void @concat_v32i16(ptr %a, ptr %b, ptr %c) {
 define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
 ; CHECK-LABEL: concat_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2i32:
@@ -270,11 +255,8 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
 define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2)  {
 ; CHECK-LABEL: concat_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4i32:
@@ -340,11 +322,8 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
 define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2)  {
 ; CHECK-LABEL: concat_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2i64:
@@ -408,10 +387,7 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
 define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
 ; CHECK-LABEL: concat_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4f16:
@@ -432,11 +408,8 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
 define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2)  {
 ; CHECK-LABEL: concat_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8f16:
@@ -505,10 +478,7 @@ define void @concat_v32f16(ptr %a, ptr %b, ptr %c) {
 define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
 ; CHECK-LABEL: concat_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2f32:
@@ -530,11 +500,8 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
 define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2)  {
 ; CHECK-LABEL: concat_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4f32:
@@ -600,11 +567,8 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
 define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2)  {
 ; CHECK-LABEL: concat_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index e433786cfdd1f..dc17513daf330 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,7 +10,6 @@ define <8 x i16> @load_zext_v8i8i16(ptr %ap)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v8i8i16:
@@ -48,7 +47,6 @@ define <4 x i32> @load_zext_v4i16i32(ptr %ap)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v4i16i32:
@@ -76,7 +74,6 @@ define <2 x i64> @load_zext_v2i32i64(ptr %ap) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v2i32i64:
@@ -133,15 +130,14 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap)  {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    mov w8, #4 // =0x4
 ; CHECK-NEXT:    mov w9, #8 // =0x8
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    mov w10, #12 // =0xc
-; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, x8]
-; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, x9]
-; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, x10]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
-; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
-; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT:    mov w8, w9
+; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, x8]
+; CHECK-NEXT:    mov w8, w10
+; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
@@ -191,8 +187,6 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap)  {
 ; CHECK-NEXT:    mov x8, #4 // =0x4
 ; CHECK-NEXT:    ld1sh { z1.s }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v8i16i32:
@@ -327,14 +321,6 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap)  {
 ; CHECK-NEXT:    ld1h { z5.d }, p0/z, [x0, x9, lsl #1]
 ; CHECK-NEXT:    ld1h { z6.d }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NEXT:    ld1h { z7.d }, p0/z, [x0, x10, lsl #1]
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
-; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
-; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
-; CHECK-NEXT:    // kill: def $q4 killed $q4 killed $z4
-; CHECK-NEXT:    // kill: def $q5 killed $q5 killed $z5
-; CHECK-NEXT:    // kill: def $q6 killed $q6 killed $z6
-; CHECK-NEXT:    // kill: def $q7 killed $q7 killed $z7
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v16i16i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 35dd827bbabc5..bcbf10d848cde 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,7 +10,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 ; CHECK-LABEL: extract_subvector_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov z3.b, z0.b[5]
@@ -18,7 +17,6 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i1:
@@ -45,7 +43,6 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov z3.b, z0.b[5]
@@ -53,7 +50,6 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i8:
@@ -78,9 +74,7 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v16i8:
@@ -117,10 +111,8 @@ define void @extract_subvector_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4i16:
@@ -138,9 +130,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i16:
@@ -177,9 +167,7 @@ define void @extract_subvector_v16i16(ptr %a, ptr %b) {
 define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2i32:
@@ -198,9 +186,7 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4i32:
@@ -237,9 +223,7 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: extract_subvector_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2i64:
@@ -276,9 +260,7 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
 define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4f16:
@@ -297,9 +279,7 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8f16:
@@ -336,9 +316,7 @@ define void @extract_subvector_v16f16(ptr %a, ptr %b) {
 define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2f32:
@@ -357,9 +335,7 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4f32:
@@ -396,9 +372,7 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
 define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: extract_subvector_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index cf308e6c4395f..c444d89335094 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,9 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @extractelement_v2f16(<2 x half> %op1) {
 ; CHECK-LABEL: extractelement_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f16:
@@ -32,9 +30,7 @@ define half @extractelement_v2f16(<2 x half> %op1) {
 define half @extractelement_v4f16(<4 x half> %op1) {
 ; CHECK-LABEL: extractelement_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[3]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f16:
@@ -52,9 +48,7 @@ define half @extractelement_v4f16(<4 x half> %op1) {
 define half @extractelement_v8f16(<8 x half> %op1) {
 ; CHECK-LABEL: extractelement_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v8f16:
@@ -73,7 +67,6 @@ define half @extractelement_v16f16(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
-; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v16f16:
@@ -92,9 +85,7 @@ define half @extractelement_v16f16(ptr %a) {
 define float @extractelement_v2f32(<2 x float> %op1) {
 ; CHECK-LABEL: extractelement_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f32:
@@ -112,9 +103,7 @@ define float @extractelement_v2f32(<2 x float> %op1) {
 define float @extractelement_v4f32(<4 x float> %op1) {
 ; CHECK-LABEL: extractelement_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f32:
@@ -133,7 +122,6 @@ define float @extractelement_v8f32(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
-; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v8f32:
@@ -163,9 +151,7 @@ define double @extractelement_v1f64(<1 x double> %op1) {
 define double @extractelement_v2f64(<2 x double> %op1) {
 ; CHECK-LABEL: extractelement_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f64:
@@ -184,7 +170,6 @@ define double @extractelement_v4f64(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index da6b3bb99dbda..0a9baf7acd974 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE2
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=SVE2
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE2
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=SVE2
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
@@ -13,6 +13,8 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr h0, [x1]
 ; SVE-NEXT:    ldr h1, [x0]
+; SVE-NEXT:    // kill: def $z0 killed $h0
+; SVE-NEXT:    // kill: def $z1 killed $h1
 ; SVE-NEXT:    and z0.h, z0.h, #0x8000
 ; SVE-NEXT:    and z1.h, z1.h, #0x7fff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -22,10 +24,12 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_f16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
-; SVE2-NEXT:    ldr h1, [x1]
-; SVE2-NEXT:    ldr h2, [x0]
-; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
-; SVE2-NEXT:    str h2, [x0]
+; SVE2-NEXT:    ldr h1, [x0]
+; SVE2-NEXT:    ldr h2, [x1]
+; SVE2-NEXT:    // kill: def $z1 killed $h1
+; SVE2-NEXT:    // kill: def $z2 killed $h2
+; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    str h1, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_f16:
@@ -57,6 +61,8 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr h0, [x1]
 ; SVE-NEXT:    ldr h1, [x0]
+; SVE-NEXT:    // kill: def $z0 killed $h0
+; SVE-NEXT:    // kill: def $z1 killed $h1
 ; SVE-NEXT:    and z0.h, z0.h, #0x8000
 ; SVE-NEXT:    and z1.h, z1.h, #0x7fff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -66,10 +72,12 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_bf16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
-; SVE2-NEXT:    ldr h1, [x1]
-; SVE2-NEXT:    ldr h2, [x0]
-; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
-; SVE2-NEXT:    str h2, [x0]
+; SVE2-NEXT:    ldr h1, [x0]
+; SVE2-NEXT:    ldr h2, [x1]
+; SVE2-NEXT:    // kill: def $z1 killed $h1
+; SVE2-NEXT:    // kill: def $z2 killed $h2
+; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    str h1, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_bf16:
@@ -78,6 +86,7 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldr h0, [x0]
 ; NONEON-NOSVE-NEXT:    ldr h1, [x1]
+; NONEON-NOSVE-NEXT:    // kill: def $s0 killed $h0
 ; NONEON-NOSVE-NEXT:    fmov w8, s0
 ; NONEON-NOSVE-NEXT:    str h1, [sp, #12]
 ; NONEON-NOSVE-NEXT:    lsl w8, w8, #16
@@ -105,6 +114,8 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr s0, [x1]
 ; SVE-NEXT:    ldr s1, [x0]
+; SVE-NEXT:    // kill: def $z0 killed $s0
+; SVE-NEXT:    // kill: def $z1 killed $s1
 ; SVE-NEXT:    and z0.s, z0.s, #0x80000000
 ; SVE-NEXT:    and z1.s, z1.s, #0x7fffffff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -114,10 +125,12 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_f32:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.s, #0x7fffffff
-; SVE2-NEXT:    ldr s1, [x1]
-; SVE2-NEXT:    ldr s2, [x0]
-; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
-; SVE2-NEXT:    str s2, [x0]
+; SVE2-NEXT:    ldr s1, [x0]
+; SVE2-NEXT:    ldr s2, [x1]
+; SVE2-NEXT:    // kill: def $z1 killed $s1
+; SVE2-NEXT:    // kill: def $z2 killed $s2
+; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    str s1, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_f32:
@@ -142,6 +155,8 @@ define void @test_copysign_f64(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr d0, [x1]
 ; SVE-NEXT:    ldr d1, [x0]
+; SVE-NEXT:    // kill: def $z0 killed $d0
+; SVE-NEXT:    // kill: def $z1 killed $d1
 ; SVE-NEXT:    and z0.d, z0.d, #0x8000000000000000
 ; SVE-NEXT:    and z1.d, z1.d, #0x7fffffffffffffff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -151,10 +166,12 @@ define void @test_copysign_f64(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_f64:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.d, #0x7fffffffffffffff
-; SVE2-NEXT:    ldr d1, [x1]
-; SVE2-NEXT:    ldr d2, [x0]
-; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
-; SVE2-NEXT:    str d2, [x0]
+; SVE2-NEXT:    ldr d1, [x0]
+; SVE2-NEXT:    ldr d2, [x1]
+; SVE2-NEXT:    // kill: def $z1 killed $d1
+; SVE2-NEXT:    // kill: def $z2 killed $d2
+; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
+; SVE2-NEXT:    str d1, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_f64:
@@ -1013,10 +1030,10 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
 ; SVE2-NEXT:    fcvt z0.s, p0/m, z0.d
 ; SVE2-NEXT:    fcvt z1.s, p0/m, z1.d
 ; SVE2-NEXT:    ptrue p0.s, vl2
-; SVE2-NEXT:    uzp1 z3.s, z0.s, z0.s
-; SVE2-NEXT:    uzp1 z2.s, z1.s, z1.s
+; SVE2-NEXT:    uzp1 z2.s, z0.s, z0.s
+; SVE2-NEXT:    uzp1 z1.s, z1.s, z1.s
+; SVE2-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; SVE2-NEXT:    mov z1.s, #0x7fffffff
-; SVE2-NEXT:    splice z0.s, p0, { z2.s, z3.s }
 ; SVE2-NEXT:    ldr q2, [x0]
 ; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
 ; SVE2-NEXT:    str q2, [x0]
@@ -1408,10 +1425,10 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
 ; SVE2-NEXT:    fcvt z0.h, p0/m, z0.s
 ; SVE2-NEXT:    fcvt z1.h, p0/m, z1.s
 ; SVE2-NEXT:    ptrue p0.h, vl4
-; SVE2-NEXT:    uzp1 z3.h, z0.h, z0.h
-; SVE2-NEXT:    uzp1 z2.h, z1.h, z1.h
+; SVE2-NEXT:    uzp1 z2.h, z0.h, z0.h
+; SVE2-NEXT:    uzp1 z1.h, z1.h, z1.h
+; SVE2-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; SVE2-NEXT:    mov z1.h, #32767 // =0x7fff
-; SVE2-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; SVE2-NEXT:    ldr q2, [x0]
 ; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
 ; SVE2-NEXT:    str q2, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index 2f708cbda1f2b..16d8dbf6e8111 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,10 +13,7 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fadd_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f16:
@@ -63,10 +60,7 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fadd_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v4f16:
@@ -113,10 +107,7 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fadd_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v8f16:
@@ -332,10 +323,7 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fadd_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f32:
@@ -360,10 +348,7 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fadd_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v4f32:
@@ -447,10 +432,7 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fadd_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f64:
@@ -520,10 +502,7 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fdiv_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f16:
@@ -570,10 +549,7 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fdiv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v4f16:
@@ -620,10 +596,7 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fdiv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v8f16:
@@ -839,10 +812,7 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fdiv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f32:
@@ -867,10 +837,7 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fdiv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v4f32:
@@ -954,10 +921,7 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fdiv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f64:
@@ -1027,11 +991,7 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
 ; CHECK-LABEL: fma_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f16:
@@ -1087,11 +1047,7 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
 ; CHECK-LABEL: fma_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f16:
@@ -1147,11 +1103,7 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
 ; CHECK-LABEL: fma_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v8f16:
@@ -1421,11 +1373,7 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
 ; CHECK-LABEL: fma_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f32:
@@ -1452,11 +1400,7 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
 ; CHECK-LABEL: fma_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f32:
@@ -1552,11 +1496,7 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
 ; CHECK-LABEL: fma_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f64:
@@ -1635,10 +1575,7 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fmul_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f16:
@@ -1685,10 +1622,7 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmul_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v4f16:
@@ -1735,10 +1669,7 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmul_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v8f16:
@@ -1954,10 +1885,7 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmul_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f32:
@@ -1982,10 +1910,7 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmul_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v4f32:
@@ -2069,10 +1994,7 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmul_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f64:
@@ -2142,9 +2064,7 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fneg_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f16:
@@ -2182,9 +2102,7 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fneg_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v4f16:
@@ -2222,9 +2140,7 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fneg_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v8f16:
@@ -2387,9 +2303,7 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fneg_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f32:
@@ -2412,9 +2326,7 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fneg_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v4f32:
@@ -2487,9 +2399,7 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fneg_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f64:
@@ -2551,9 +2461,7 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fsqrt_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f16:
@@ -2591,9 +2499,7 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fsqrt_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v4f16:
@@ -2631,9 +2537,7 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fsqrt_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v8f16:
@@ -2796,9 +2700,7 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fsqrt_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f32:
@@ -2821,9 +2723,7 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fsqrt_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v4f32:
@@ -2896,9 +2796,7 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fsqrt_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f64:
@@ -2960,10 +2858,7 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fsub_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f16:
@@ -3010,10 +2905,7 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fsub_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v4f16:
@@ -3060,10 +2952,7 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fsub_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v8f16:
@@ -3279,10 +3168,7 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fsub_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f32:
@@ -3307,10 +3193,7 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fsub_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v4f32:
@@ -3394,10 +3277,7 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fsub_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f64:
@@ -3467,9 +3347,7 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fabs_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f16:
@@ -3507,9 +3385,7 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fabs_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v4f16:
@@ -3547,9 +3423,7 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fabs_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v8f16:
@@ -3712,9 +3586,7 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fabs_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f32:
@@ -3737,9 +3609,7 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fabs_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v4f32:
@@ -3812,9 +3682,7 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fabs_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index a8220d9993227..72ea2f9bf3d87 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,12 +13,9 @@ define <2 x i16> @fcmp_oeq_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f16:
@@ -51,11 +48,8 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v4f16:
@@ -103,11 +97,8 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v8f16:
@@ -327,11 +318,8 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f32:
@@ -358,11 +346,8 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v4f32:
@@ -458,7 +443,6 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) {
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csetm x8, eq
 ; CHECK-NEXT:    mov z0.d, x8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v1f64:
@@ -480,11 +464,8 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 41a6cdc7df4dd..6c29666890bef 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,9 +12,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v2f16_to_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -40,9 +39,8 @@ define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
 define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v4f16_to_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
@@ -74,11 +72,10 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v8f16_to_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
@@ -123,15 +120,13 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v16f16_to_v16f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z1
 ; CHECK-NEXT:    ext z2.b, z2.b, z1.b, #8
 ; CHECK-NEXT:    movprfx z3, z0
 ; CHECK-NEXT:    ext z3.b, z3.b, z0.b, #8
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
@@ -1068,7 +1063,6 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) {
 ; CHECK-LABEL: fcvt_v1f64_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
 ; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -1087,7 +1081,6 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) {
 ; CHECK-LABEL: fcvt_v2f64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
 ; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index e53d6a9081154..e5266eb95f697 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,10 +13,7 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmaxnm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v4f16:
@@ -63,10 +60,7 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmaxnm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v8f16:
@@ -282,10 +276,7 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmaxnm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v2f32:
@@ -310,10 +301,7 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmaxnm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v4f32:
@@ -416,10 +404,7 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmaxnm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v2f64:
@@ -489,10 +474,7 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fminnm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v4f16:
@@ -539,10 +521,7 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fminnm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v8f16:
@@ -758,10 +737,7 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fminnm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v2f32:
@@ -786,10 +762,7 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fminnm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v4f32:
@@ -892,10 +865,7 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fminnm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v2f64:
@@ -965,10 +935,7 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmax_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v4f16:
@@ -1015,10 +982,7 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmax_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v8f16:
@@ -1234,10 +1198,7 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmax_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v2f32:
@@ -1262,10 +1223,7 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmax_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v4f32:
@@ -1368,10 +1326,7 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmax_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v2f64:
@@ -1441,10 +1396,7 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmin_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v4f16:
@@ -1491,10 +1443,7 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmin_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v8f16:
@@ -1710,10 +1659,7 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmin_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v2f32:
@@ -1738,10 +1684,7 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmin_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v4f32:
@@ -1844,10 +1787,7 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmin_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 03bc39a6ef3ee..443cb93aa8ca1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,9 +13,7 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintp_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f16:
@@ -53,9 +51,7 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintp_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v4f16:
@@ -93,9 +89,7 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintp_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v8f16:
@@ -258,9 +252,7 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintp_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f32:
@@ -283,9 +275,7 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintp_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v4f32:
@@ -377,9 +367,7 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintp_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f64:
@@ -441,9 +429,7 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintm_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f16:
@@ -481,9 +467,7 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v4f16:
@@ -521,9 +505,7 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v8f16:
@@ -686,9 +668,7 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f32:
@@ -711,9 +691,7 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v4f32:
@@ -805,9 +783,7 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f64:
@@ -869,9 +845,7 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frinti_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f16:
@@ -909,9 +883,7 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frinti_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v4f16:
@@ -949,9 +921,7 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frinti_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v8f16:
@@ -1114,9 +1084,7 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frinti_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f32:
@@ -1139,9 +1107,7 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frinti_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v4f32:
@@ -1233,9 +1199,7 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frinti_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f64:
@@ -1297,9 +1261,7 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintx_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f16:
@@ -1337,9 +1299,7 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintx_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v4f16:
@@ -1377,9 +1337,7 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintx_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v8f16:
@@ -1542,9 +1500,7 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintx_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f32:
@@ -1567,9 +1523,7 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintx_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v4f32:
@@ -1661,9 +1615,7 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintx_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f64:
@@ -1725,9 +1677,7 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frinta_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f16:
@@ -1765,9 +1715,7 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frinta_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v4f16:
@@ -1805,9 +1753,7 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frinta_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v8f16:
@@ -1970,9 +1916,7 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frinta_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f32:
@@ -1995,9 +1939,7 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frinta_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v4f32:
@@ -2089,9 +2031,7 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frinta_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f64:
@@ -2153,9 +2093,7 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintn_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f16:
@@ -2193,9 +2131,7 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintn_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v4f16:
@@ -2233,9 +2169,7 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintn_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v8f16:
@@ -2398,9 +2332,7 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintn_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f32:
@@ -2423,9 +2355,7 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintn_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v4f32:
@@ -2517,9 +2447,7 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintn_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f64:
@@ -2581,9 +2509,7 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintz_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f16:
@@ -2621,9 +2547,7 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintz_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v4f16:
@@ -2661,9 +2585,7 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintz_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v8f16:
@@ -2826,9 +2748,7 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintz_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f32:
@@ -2851,9 +2771,7 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintz_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v4f32:
@@ -2945,9 +2863,7 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintz_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index bcc446d9d1a41..eced98dda447a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,12 +10,9 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f16:
@@ -52,12 +49,9 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f16:
@@ -94,12 +88,9 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8f16:
@@ -254,12 +245,9 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f32:
@@ -286,12 +274,9 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f32:
@@ -405,15 +390,11 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f64:
@@ -437,7 +418,6 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
 define void @select_v4f64(ptr %a, ptr %b, i1 %mask) {
 ; CHECK-LABEL: select_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index c43e929f47848..49b207016348d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -1,25 +1,21 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
 define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    mov z3.s, z2.s[1]
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f16:
@@ -53,16 +49,12 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
 define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f16:
@@ -109,17 +101,13 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
 define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8f16:
@@ -338,16 +326,12 @@ define void @select_v16f16(ptr %a, ptr %b) {
 define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f32:
@@ -376,17 +360,13 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
 define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f32:
@@ -513,17 +493,13 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f64:
@@ -532,12 +508,14 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    str d2, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #8]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #16]
+; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
-; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    cmp x8, #0
-; NONEON-NOSVE-NEXT:    sbfx x8, x9, #0, #1
+; NONEON-NOSVE-NEXT:    mov w8, w9
+; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    fcsel d3, d2, d0, ne
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    cmp x8, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index 02b5469c0ff85..63b0242264678 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,10 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: add_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i8:
@@ -49,10 +46,7 @@ define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: add_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v8i8:
@@ -102,10 +96,7 @@ define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: add_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v16i8:
@@ -343,10 +334,7 @@ define void @add_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: add_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i16:
@@ -371,10 +359,7 @@ define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: add_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i16:
@@ -408,10 +393,7 @@ define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: add_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v8i16:
@@ -553,10 +535,7 @@ define void @add_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: add_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i32:
@@ -581,10 +560,7 @@ define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: add_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i32:
@@ -672,10 +648,7 @@ define void @add_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: add_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v1i64:
@@ -696,10 +669,7 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: add_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i64:
@@ -771,18 +741,12 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: mul_v4i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i8:
@@ -817,18 +781,12 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: mul_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v8i8:
@@ -879,18 +837,12 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: mul_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v16i8:
@@ -1139,18 +1091,12 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: mul_v2i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i16:
@@ -1175,18 +1121,12 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: mul_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i16:
@@ -1221,18 +1161,12 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: mul_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v8i16:
@@ -1385,18 +1319,12 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: mul_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i32:
@@ -1421,18 +1349,12 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: mul_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i32:
@@ -1525,18 +1447,12 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: mul_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v1i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v1i64:
@@ -1558,18 +1474,12 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: mul_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i64:
@@ -1647,10 +1557,7 @@ define void @mul_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: sub_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i8:
@@ -1684,10 +1591,7 @@ define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: sub_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v8i8:
@@ -1737,10 +1641,7 @@ define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sub_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v16i8:
@@ -1978,10 +1879,7 @@ define void @sub_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: sub_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i16:
@@ -2006,10 +1904,7 @@ define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: sub_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i16:
@@ -2043,10 +1938,7 @@ define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sub_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v8i16:
@@ -2188,10 +2080,7 @@ define void @sub_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: sub_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i32:
@@ -2216,10 +2105,7 @@ define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: sub_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i32:
@@ -2307,10 +2193,7 @@ define void @sub_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: sub_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v1i64:
@@ -2331,10 +2214,7 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: sub_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i64:
@@ -2406,10 +2286,8 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) {
 ; CHECK-LABEL: abs_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i8:
@@ -2443,9 +2321,7 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) {
 ; CHECK-LABEL: abs_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v8i8:
@@ -2495,9 +2371,7 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) {
 ; CHECK-LABEL: abs_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v16i8:
@@ -2732,10 +2606,8 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) {
 ; CHECK-LABEL: abs_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i16:
@@ -2760,9 +2632,7 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) {
 ; CHECK-LABEL: abs_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i16:
@@ -2796,9 +2666,7 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) {
 ; CHECK-LABEL: abs_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v8i16:
@@ -2937,9 +2805,7 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) {
 ; CHECK-LABEL: abs_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i32:
@@ -2964,9 +2830,7 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) {
 ; CHECK-LABEL: abs_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i32:
@@ -3051,9 +2915,7 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: abs_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v1i64:
@@ -3075,9 +2937,7 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) {
 ; CHECK-LABEL: abs_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index ba20de65a253a..b5c8461bf8a78 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,11 +13,8 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: icmp_eq_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v8i8:
@@ -77,11 +74,8 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: icmp_eq_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v16i8:
@@ -373,11 +367,8 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: icmp_eq_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v4i16:
@@ -417,11 +408,8 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: icmp_eq_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v8i16:
@@ -593,11 +581,8 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: icmp_eq_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v2i32:
@@ -625,11 +610,8 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: icmp_eq_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v4i32:
@@ -729,11 +711,8 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: icmp_eq_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v1i64:
@@ -757,11 +736,8 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: icmp_eq_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 8e1d61b51e2bb..8fa3c00f506a6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,8 +13,6 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -22,7 +20,6 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
@@ -56,11 +53,9 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
@@ -74,7 +69,6 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
@@ -124,18 +118,16 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z4.s, z2.h
 ; CHECK-NEXT:    sunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z2.h
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
@@ -148,19 +140,18 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
@@ -266,6 +257,7 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z4.h, z2.b
 ; CHECK-NEXT:    sunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    sunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
@@ -274,6 +266,7 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    sunpklo z16.h, z5.b
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    sunpklo z5.h, z5.b
 ; CHECK-NEXT:    sunpklo z18.s, z16.h
 ; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
@@ -282,34 +275,32 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    sunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    sunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
 ; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z6.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z16.h, z16.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z6.b, z6.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v32i8:
@@ -463,12 +454,9 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
@@ -493,14 +481,11 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
@@ -534,13 +519,11 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
@@ -549,7 +532,6 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
@@ -617,14 +599,14 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    sdiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -715,10 +697,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
@@ -743,10 +722,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
@@ -830,10 +806,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
@@ -855,10 +828,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:
@@ -925,16 +895,13 @@ define void @sdiv_v4i64(ptr %a, ptr %b)  {
 define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: udiv_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i8:
@@ -968,11 +935,9 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: udiv_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
@@ -986,7 +951,6 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v8i8:
@@ -1036,18 +1000,16 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: udiv_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z4.s, z2.h
 ; CHECK-NEXT:    uunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
@@ -1060,19 +1022,18 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v16i8:
@@ -1178,6 +1139,7 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z4.h, z2.b
 ; CHECK-NEXT:    uunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
@@ -1186,6 +1148,7 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    uunpklo z16.h, z5.b
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uunpklo z5.h, z5.b
 ; CHECK-NEXT:    uunpklo z18.s, z16.h
 ; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
@@ -1194,34 +1157,32 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    uunpklo z5.s, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    uunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    uunpklo z6.s, z6.h
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
 ; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z6.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
+; CHECK-NEXT:    uzp1 z4.h, z16.h, z16.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z6.b, z6.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    stp q0, q1, [x0]
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v32i8:
@@ -1374,13 +1335,10 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: udiv_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i16:
@@ -1405,14 +1363,11 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: udiv_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i16:
@@ -1446,13 +1401,11 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: udiv_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
@@ -1461,7 +1414,6 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v8i16:
@@ -1529,14 +1481,14 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    udiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -1627,10 +1579,7 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: udiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i32:
@@ -1655,10 +1604,7 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i32:
@@ -1742,10 +1688,7 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: udiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v1i64:
@@ -1767,10 +1710,7 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i64:
@@ -1858,12 +1798,15 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
@@ -1871,12 +1814,14 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr w9, w9, #6
 ; NONEON-NOSVE-NEXT:    stp w9, w11, [sp, #56]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
@@ -1884,12 +1829,14 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr w9, w9, #6
 ; NONEON-NOSVE-NEXT:    stp w9, w11, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
@@ -1897,18 +1844,20 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr w9, w9, #6
 ; NONEON-NOSVE-NEXT:    stp w9, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #4]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
-; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
+; NONEON-NOSVE-NEXT:    lsr w10, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x8, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w8
 ; NONEON-NOSVE-NEXT:    add w8, w8, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w8, w8, #6
-; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #64
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index f290e3bfd6e93..9ab036ab4757d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=CHECK,SVE2
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -16,7 +16,6 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i1_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -73,7 +72,6 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i3_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -94,12 +92,16 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 ; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp]
 ; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldp w11, w10, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #3
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    sbfx x9, x9, #0, #3
 ; NONEON-NOSVE-NEXT:    sbfx x10, x10, #0, #3
 ; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
-; NONEON-NOSVE-NEXT:    sbfx x8, x11, #0, #3
+; NONEON-NOSVE-NEXT:    mov w8, w11
+; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #3
 ; NONEON-NOSVE-NEXT:    stp x8, x10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
@@ -117,7 +119,6 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
@@ -372,7 +373,6 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
 define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -410,7 +410,6 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
@@ -420,8 +419,8 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    movprfx z3, z1
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z2.s, z2.h
+; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    stp q1, q3, [x0, #32]
@@ -675,9 +674,8 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
 define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -712,7 +710,6 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i8_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -761,7 +758,6 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
@@ -776,12 +772,12 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
+; CHECK-NEXT:    sunpklo z4.d, z4.s
 ; CHECK-NEXT:    movprfx z5, z3
 ; CHECK-NEXT:    ext z5.b, z5.b, z3.b, #8
-; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    sunpklo z4.d, z4.s
 ; CHECK-NEXT:    movprfx z6, z2
 ; CHECK-NEXT:    ext z6.b, z6.b, z2.b, #8
+; CHECK-NEXT:    sunpklo z3.d, z3.s
 ; CHECK-NEXT:    sunpklo z2.d, z2.s
 ; CHECK-NEXT:    sunpklo z5.d, z5.s
 ; CHECK-NEXT:    sunpklo z6.d, z6.s
@@ -789,9 +785,9 @@ define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    movprfx z0, z1
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q3, q5, [x0, #64]
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    stp q2, q6, [x0, #32]
+; CHECK-NEXT:    stp q3, q5, [x0, #64]
 ; CHECK-NEXT:    stp q1, q0, [x0, #96]
 ; CHECK-NEXT:    ret
 ;
@@ -932,125 +928,157 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -96
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #17]
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #19]
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #19]
-; NONEON-NOSVE-NEXT:    add w16, w16, w16
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
+; NONEON-NOSVE-NEXT:    add w0, w14, w14
+; NONEON-NOSVE-NEXT:    add w7, w16, w16
+; NONEON-NOSVE-NEXT:    add w16, w17, w17
+; NONEON-NOSVE-NEXT:    add w17, w18, w18
+; NONEON-NOSVE-NEXT:    mov w18, w0
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sxtb x19, w17
-; NONEON-NOSVE-NEXT:    sxtb x20, w16
+; NONEON-NOSVE-NEXT:    sxtb x19, w18
+; NONEON-NOSVE-NEXT:    // kill: def $x17 killed $w17
+; NONEON-NOSVE-NEXT:    sxtb x20, w17
+; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    add w7, w14, w14
-; NONEON-NOSVE-NEXT:    add w18, w15, w15
-; NONEON-NOSVE-NEXT:    sxtb x21, w18
+; NONEON-NOSVE-NEXT:    mov w18, w16
+; NONEON-NOSVE-NEXT:    // kill: def $x7 killed $w7
 ; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #22]
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #23]
 ; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #160]
 ; NONEON-NOSVE-NEXT:    add w19, w30, w30
-; NONEON-NOSVE-NEXT:    sxtb x7, w7
+; NONEON-NOSVE-NEXT:    sxtb x21, w18
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #23]
+; NONEON-NOSVE-NEXT:    sxtb x7, w7
+; NONEON-NOSVE-NEXT:    // kill: def $x19 killed $w19
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #25]
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #26]
 ; NONEON-NOSVE-NEXT:    sxtb x19, w19
 ; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    stp x7, x21, [sp, #144]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
 ; NONEON-NOSVE-NEXT:    add w7, w29, w29
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #27]
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    stp x8, x19, [sp, #128]
 ; NONEON-NOSVE-NEXT:    add w19, w28, w28
-; NONEON-NOSVE-NEXT:    sxtb x7, w7
+; NONEON-NOSVE-NEXT:    // kill: def $x7 killed $w7
 ; NONEON-NOSVE-NEXT:    add w8, w27, w27
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x7, w7
+; NONEON-NOSVE-NEXT:    // kill: def $x19 killed $w19
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
 ; NONEON-NOSVE-NEXT:    sxtb x19, w19
 ; NONEON-NOSVE-NEXT:    sxtb x8, w8
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
 ; NONEON-NOSVE-NEXT:    stp x7, x9, [sp, #112]
 ; NONEON-NOSVE-NEXT:    add w9, w26, w26
 ; NONEON-NOSVE-NEXT:    add w7, w25, w25
 ; NONEON-NOSVE-NEXT:    stp x8, x19, [sp, #96]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    mov w8, w7
+; NONEON-NOSVE-NEXT:    add w19, w24, w24
 ; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w7
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    add w7, w23, w23
 ; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #31]
-; NONEON-NOSVE-NEXT:    add w19, w24, w24
-; NONEON-NOSVE-NEXT:    add w7, w23, w23
 ; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #80]
-; NONEON-NOSVE-NEXT:    sxtb x9, w19
-; NONEON-NOSVE-NEXT:    sxtb x8, w7
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #33]
+; NONEON-NOSVE-NEXT:    mov w9, w19
+; NONEON-NOSVE-NEXT:    mov w8, w7
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w19, w22, w22
 ; NONEON-NOSVE-NEXT:    add w6, w6, w6
+; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #33]
+; NONEON-NOSVE-NEXT:    add w4, w4, w4
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT:    sxtb x9, w19
-; NONEON-NOSVE-NEXT:    sxtb x8, w6
+; NONEON-NOSVE-NEXT:    mov w9, w19
+; NONEON-NOSVE-NEXT:    mov w8, w6
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    add w5, w5, w5
 ; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    add w5, w5, w5
-; NONEON-NOSVE-NEXT:    add w4, w4, w4
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
-; NONEON-NOSVE-NEXT:    sxtb x9, w5
-; NONEON-NOSVE-NEXT:    sxtb x8, w4
 ; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #36]
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #37]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
+; NONEON-NOSVE-NEXT:    mov w9, w5
+; NONEON-NOSVE-NEXT:    mov w8, w4
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w3, w3, w3
 ; NONEON-NOSVE-NEXT:    add w2, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #37]
+; NONEON-NOSVE-NEXT:    add w18, w18, w18
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #288]
-; NONEON-NOSVE-NEXT:    sxtb x9, w3
-; NONEON-NOSVE-NEXT:    sxtb x8, w2
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #39]
+; NONEON-NOSVE-NEXT:    mov w9, w3
+; NONEON-NOSVE-NEXT:    mov w8, w2
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w0, w0, w0
-; NONEON-NOSVE-NEXT:    add w18, w18, w18
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #272]
-; NONEON-NOSVE-NEXT:    sxtb x9, w0
-; NONEON-NOSVE-NEXT:    sxtb x8, w18
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #38]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #40]
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #41]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #272]
+; NONEON-NOSVE-NEXT:    mov w9, w0
+; NONEON-NOSVE-NEXT:    mov w8, w18
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w17, w17, w17
 ; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #41]
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #256]
-; NONEON-NOSVE-NEXT:    sxtb x9, w17
-; NONEON-NOSVE-NEXT:    sxtb x8, w16
+; NONEON-NOSVE-NEXT:    mov w9, w17
+; NONEON-NOSVE-NEXT:    mov w8, w16
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
 ; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #43]
-; NONEON-NOSVE-NEXT:    add w15, w15, w15
-; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #44]
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #240]
-; NONEON-NOSVE-NEXT:    sxtb x9, w15
-; NONEON-NOSVE-NEXT:    sxtb x8, w14
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #45]
+; NONEON-NOSVE-NEXT:    mov w9, w15
+; NONEON-NOSVE-NEXT:    mov w8, w14
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
 ; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #45]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #224]
-; NONEON-NOSVE-NEXT:    sxtb x9, w13
-; NONEON-NOSVE-NEXT:    sxtb x8, w12
+; NONEON-NOSVE-NEXT:    mov w9, w13
+; NONEON-NOSVE-NEXT:    mov w8, w12
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #208]
-; NONEON-NOSVE-NEXT:    sxtb x9, w11
+; NONEON-NOSVE-NEXT:    mov w9, w11
 ; NONEON-NOSVE-NEXT:    ldr w11, [sp, #8] // 4-byte Reload
-; NONEON-NOSVE-NEXT:    sxtb x8, w10
+; NONEON-NOSVE-NEXT:    mov w8, w10
 ; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12] // 4-byte Reload
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    sxtb x11, w11
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #192]
-; NONEON-NOSVE-NEXT:    sxtb x8, w10
+; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
 ; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #192]
+; NONEON-NOSVE-NEXT:    mov w8, w10
+; NONEON-NOSVE-NEXT:    sxtb x11, w11
+; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    ldp q5, q4, [sp, #80]
-; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #176]
 ; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #176]
 ; NONEON-NOSVE-NEXT:    ldp q17, q16, [sp, #272]
 ; NONEON-NOSVE-NEXT:    ldp q18, q21, [sp, #176]
 ; NONEON-NOSVE-NEXT:    ldp q20, q19, [sp, #240]
@@ -1085,7 +1113,6 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
@@ -1219,7 +1246,6 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
 define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -1251,7 +1277,6 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i16_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
@@ -1261,8 +1286,8 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    movprfx z3, z1
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z2.d, z2.s
+; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z3.d, z3.s
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    stp q1, q3, [x0, #32]
@@ -1338,60 +1363,76 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #2]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #6]
+; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrh w16, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #6]
 ; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #4]
 ; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #10]
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
 ; NONEON-NOSVE-NEXT:    add w16, w16, w16
-; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    sxth x16, w16
-; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
+; NONEON-NOSVE-NEXT:    // kill: def $x16 killed $w16
 ; NONEON-NOSVE-NEXT:    add w3, w3, w3
-; NONEON-NOSVE-NEXT:    sxth x12, w12
+; NONEON-NOSVE-NEXT:    // kill: def $x15 killed $w15
 ; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #14]
-; NONEON-NOSVE-NEXT:    stp x13, x16, [sp, #80]
-; NONEON-NOSVE-NEXT:    sxth x13, w3
-; NONEON-NOSVE-NEXT:    add w16, w5, w5
-; NONEON-NOSVE-NEXT:    add w3, w4, w4
+; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    sxth x16, w16
+; NONEON-NOSVE-NEXT:    sxth x15, w15
+; NONEON-NOSVE-NEXT:    // kill: def $x3 killed $w3
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
 ; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #18]
-; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #64]
-; NONEON-NOSVE-NEXT:    sxth x12, w16
-; NONEON-NOSVE-NEXT:    sxth x13, w3
-; NONEON-NOSVE-NEXT:    add w16, w2, w2
-; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    stp x16, x13, [sp, #80]
+; NONEON-NOSVE-NEXT:    sxth x16, w3
+; NONEON-NOSVE-NEXT:    add w13, w5, w5
+; NONEON-NOSVE-NEXT:    add w3, w4, w4
+; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #48]
-; NONEON-NOSVE-NEXT:    sxth x12, w16
-; NONEON-NOSVE-NEXT:    sxth x13, w0
+; NONEON-NOSVE-NEXT:    stp x16, x15, [sp, #64]
+; NONEON-NOSVE-NEXT:    mov w16, w3
+; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    sxth x16, w16
+; NONEON-NOSVE-NEXT:    add w15, w2, w2
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #20]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #22]
-; NONEON-NOSVE-NEXT:    add w16, w18, w18
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
-; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #32]
-; NONEON-NOSVE-NEXT:    sxth x12, w16
-; NONEON-NOSVE-NEXT:    sxth x13, w17
-; NONEON-NOSVE-NEXT:    add w15, w15, w15
-; NONEON-NOSVE-NEXT:    add w14, w14, w14
+; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #20]
+; NONEON-NOSVE-NEXT:    stp x16, x13, [sp, #48]
+; NONEON-NOSVE-NEXT:    mov w13, w15
+; NONEON-NOSVE-NEXT:    mov w16, w0
+; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    sxth x16, w16
+; NONEON-NOSVE-NEXT:    add w15, w18, w18
+; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #22]
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    stp x16, x13, [sp, #32]
+; NONEON-NOSVE-NEXT:    mov w13, w15
+; NONEON-NOSVE-NEXT:    mov w15, w17
+; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    sxth x15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #144]
-; NONEON-NOSVE-NEXT:    sxth x12, w15
-; NONEON-NOSVE-NEXT:    sxth x13, w14
+; NONEON-NOSVE-NEXT:    stp x15, x13, [sp, #144]
+; NONEON-NOSVE-NEXT:    mov w13, w14
+; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
+; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    sxth x13, w13
+; NONEON-NOSVE-NEXT:    sxth x12, w12
 ; NONEON-NOSVE-NEXT:    sxth x11, w11
 ; NONEON-NOSVE-NEXT:    sxth x10, w10
 ; NONEON-NOSVE-NEXT:    sxth x9, w9
 ; NONEON-NOSVE-NEXT:    sxth x8, w8
-; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #128]
+; NONEON-NOSVE-NEXT:    stp x12, x13, [sp, #128]
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp x10, x11, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #32]
@@ -1418,7 +1459,6 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
@@ -1473,26 +1513,34 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
 ; NONEON-NOSVE-NEXT:    sxtw x8, w8
 ; NONEON-NOSVE-NEXT:    sxtw x9, w9
-; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
 ; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
 ; NONEON-NOSVE-NEXT:    add w8, w15, w15
 ; NONEON-NOSVE-NEXT:    add w9, w14, w14
-; NONEON-NOSVE-NEXT:    sxtw x8, w8
-; NONEON-NOSVE-NEXT:    sxtw x9, w9
-; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
 ; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    add w10, w10, w10
+; NONEON-NOSVE-NEXT:    sxtw x11, w11
+; NONEON-NOSVE-NEXT:    sxtw x8, w8
+; NONEON-NOSVE-NEXT:    sxtw x9, w9
+; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
+; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
 ; NONEON-NOSVE-NEXT:    sxtw x13, w13
 ; NONEON-NOSVE-NEXT:    sxtw x12, w12
-; NONEON-NOSVE-NEXT:    sxtw x11, w11
 ; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #80]
-; NONEON-NOSVE-NEXT:    sxtw x8, w10
+; NONEON-NOSVE-NEXT:    mov w8, w10
+; NONEON-NOSVE-NEXT:    sxtw x8, w8
 ; NONEON-NOSVE-NEXT:    stp x12, x13, [sp, #32]
-; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
@@ -1512,7 +1560,6 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
@@ -1767,7 +1814,6 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
 define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -1805,7 +1851,6 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
@@ -1815,8 +1860,8 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    movprfx z3, z1
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    stp q1, q3, [x0, #32]
@@ -2070,7 +2115,6 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
 define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    movprfx z1, z0
@@ -2105,7 +2149,6 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i8_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -2158,7 +2201,6 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
@@ -2173,12 +2215,12 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    ext z4.b, z4.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
+; CHECK-NEXT:    uunpklo z4.d, z4.s
 ; CHECK-NEXT:    movprfx z5, z3
 ; CHECK-NEXT:    ext z5.b, z5.b, z3.b, #8
-; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    uunpklo z4.d, z4.s
 ; CHECK-NEXT:    movprfx z6, z2
 ; CHECK-NEXT:    ext z6.b, z6.b, z2.b, #8
+; CHECK-NEXT:    uunpklo z3.d, z3.s
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    uunpklo z5.d, z5.s
 ; CHECK-NEXT:    uunpklo z6.d, z6.s
@@ -2186,9 +2228,9 @@ define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-NEXT:    movprfx z0, z1
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z1.d, z1.s
-; CHECK-NEXT:    stp q3, q5, [x0, #64]
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    stp q2, q6, [x0, #32]
+; CHECK-NEXT:    stp q3, q5, [x0, #64]
 ; CHECK-NEXT:    stp q1, q0, [x0, #96]
 ; CHECK-NEXT:    ret
 ;
@@ -2513,7 +2555,6 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
@@ -2647,7 +2688,6 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
 define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
@@ -2681,7 +2721,6 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i16_v8i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
@@ -2691,8 +2730,8 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    movprfx z3, z1
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z3.d, z3.s
 ; CHECK-NEXT:    stp q0, q2, [x0]
 ; CHECK-NEXT:    stp q1, q3, [x0, #32]
@@ -2862,7 +2901,6 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
 define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z1, z0
 ; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -2952,7 +2990,6 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    mov z1.s, w0
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    uunpklo z1.d, z1.s
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
 ; SVE-NEXT:    str q0, [x1]
@@ -2961,7 +2998,6 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; SVE2-LABEL: extend_and_mul:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z1.s, w0
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    uunpklo z1.d, z1.s
 ; SVE2-NEXT:    mul z0.d, z1.d, z0.d
 ; SVE2-NEXT:    str q0, [x1]
@@ -2973,6 +3009,7 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
 ; NONEON-NOSVE-NEXT:    mov w9, w0
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    mul x10, x9, x8
 ; NONEON-NOSVE-NEXT:    ldr x8, [sp]
 ; NONEON-NOSVE-NEXT:    mul x8, x9, x8
@@ -2993,6 +3030,7 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; CHECK-LABEL: extend_no_mul:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    mov z0.d, x8
 ; CHECK-NEXT:    str q0, [x1]
 ; CHECK-NEXT:    ret
@@ -3000,6 +3038,7 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; NONEON-NOSVE-LABEL: extend_no_mul:
 ; NONEON-NOSVE:       // %bb.0: // %entry
 ; NONEON-NOSVE-NEXT:    mov w8, w0
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    stp x8, x8, [sp, #-16]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 687dd9445f387..3a6981118be2a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,10 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: and_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v8i8:
@@ -65,10 +62,7 @@ define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: and_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v16i8:
@@ -306,10 +300,7 @@ define void @and_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: and_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v4i16:
@@ -343,10 +334,7 @@ define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: and_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v8i16:
@@ -488,10 +476,7 @@ define void @and_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: and_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v2i32:
@@ -516,10 +501,7 @@ define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: and_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v4i32:
@@ -607,10 +589,7 @@ define void @and_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: and_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v1i64:
@@ -631,10 +610,7 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: and_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v2i64:
@@ -705,10 +681,7 @@ define void @and_v4i64(ptr %a, ptr %b) {
 define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: or_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v8i8:
@@ -758,10 +731,7 @@ define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: or_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v16i8:
@@ -999,10 +969,7 @@ define void @or_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: or_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v4i16:
@@ -1036,10 +1003,7 @@ define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: or_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v8i16:
@@ -1181,10 +1145,7 @@ define void @or_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: or_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v2i32:
@@ -1209,10 +1170,7 @@ define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: or_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v4i32:
@@ -1300,10 +1258,7 @@ define void @or_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: or_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v1i64:
@@ -1324,10 +1279,7 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: or_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v2i64:
@@ -1398,10 +1350,7 @@ define void @or_v4i64(ptr %a, ptr %b) {
 define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: xor_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v8i8:
@@ -1451,10 +1400,7 @@ define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: xor_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v16i8:
@@ -1692,10 +1638,7 @@ define void @xor_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: xor_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v4i16:
@@ -1729,10 +1672,7 @@ define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: xor_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v8i16:
@@ -1874,10 +1814,7 @@ define void @xor_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: xor_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v2i32:
@@ -1902,10 +1839,7 @@ define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: xor_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v4i32:
@@ -1993,10 +1927,7 @@ define void @xor_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: xor_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v1i64:
@@ -2017,10 +1948,7 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: xor_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index d858d8171926e..1ff2ab9b249b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,10 +13,7 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: smax_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v8i8:
@@ -75,10 +72,7 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: smax_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v16i8:
@@ -366,10 +360,7 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: smax_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v4i16:
@@ -408,10 +399,7 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: smax_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v8i16:
@@ -579,10 +567,7 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: smax_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v2i32:
@@ -609,10 +594,7 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: smax_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v4i32:
@@ -709,10 +691,7 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: smax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v1i64:
@@ -736,10 +715,7 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: smax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v2i64:
@@ -815,10 +791,7 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: smin_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v8i8:
@@ -877,10 +850,7 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: smin_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v16i8:
@@ -1168,10 +1138,7 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: smin_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v4i16:
@@ -1210,10 +1177,7 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: smin_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v8i16:
@@ -1381,10 +1345,7 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: smin_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v2i32:
@@ -1411,10 +1372,7 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: smin_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v4i32:
@@ -1511,10 +1469,7 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: smin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v1i64:
@@ -1538,10 +1493,7 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: smin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v2i64:
@@ -1617,10 +1569,7 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: umax_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v8i8:
@@ -1679,10 +1628,7 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: umax_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v16i8:
@@ -1970,10 +1916,7 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: umax_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v4i16:
@@ -2012,10 +1955,7 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: umax_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v8i16:
@@ -2183,10 +2123,7 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: umax_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v2i32:
@@ -2213,10 +2150,7 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: umax_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v4i32:
@@ -2313,10 +2247,7 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: umax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v1i64:
@@ -2340,10 +2271,7 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: umax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v2i64:
@@ -2419,10 +2347,7 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: umin_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v8i8:
@@ -2481,10 +2406,7 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: umin_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v16i8:
@@ -2772,10 +2694,7 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: umin_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v4i16:
@@ -2814,10 +2733,7 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: umin_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v8i16:
@@ -2985,10 +2901,7 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: umin_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v2i32:
@@ -3015,10 +2928,7 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: umin_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v4i32:
@@ -3115,10 +3025,7 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: umin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v1i64:
@@ -3142,10 +3049,7 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: umin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index 319fa5c845827..7c6b3e058d09e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefix=SVE2
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefix=SVE2
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefix=SVE2
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefix=SVE2
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -12,10 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE2-LABEL: mul_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index f0d7f7df3db70..af1fb3576e110 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE2
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=SVE2
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s --check-prefixes=SVE2
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s --check-prefixes=SVE2
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 ; This test only tests the legal types for a given vector width, as mulh nodes
 ; do not get generated for non-legal types.
@@ -17,25 +17,19 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: smulh_v4i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
 ; SVE-NEXT:    lsr z0.h, z0.h, #4
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i8:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    ptrue p0.h, vl4
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
 ; SVE2-NEXT:    lsr z0.h, z0.h, #4
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i8:
@@ -78,18 +72,12 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: smulh_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v8i8:
@@ -152,18 +140,12 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: smulh_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v16i8:
@@ -570,25 +552,19 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: smulh_v2i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE-NEXT:    sxth z1.s, p0/m, z1.s
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
 ; SVE-NEXT:    lsr z0.s, z0.s, #16
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    ptrue p0.s, vl2
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
 ; SVE2-NEXT:    lsr z0.s, z0.s, #16
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i16:
@@ -620,18 +596,12 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: smulh_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i16:
@@ -674,18 +644,12 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: smulh_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v8i16:
@@ -904,18 +868,12 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: smulh_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i32:
@@ -945,18 +903,12 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: smulh_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i32:
@@ -1076,18 +1028,12 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: smulh_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v1i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v1i64:
@@ -1113,18 +1059,12 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: smulh_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i64:
@@ -1213,25 +1153,19 @@ define void @smulh_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: umulh_v4i8:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    and z0.h, z0.h, #0xff
 ; SVE-NEXT:    and z1.h, z1.h, #0xff
+; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
 ; SVE-NEXT:    lsr z0.h, z0.h, #4
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    and z0.h, z0.h, #0xff
 ; SVE2-NEXT:    and z1.h, z1.h, #0xff
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
 ; SVE2-NEXT:    lsr z0.h, z0.h, #4
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i8:
@@ -1274,18 +1208,12 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: umulh_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v8i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v8i8:
@@ -1348,18 +1276,12 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: umulh_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v16i8:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v16i8:
@@ -1765,25 +1687,19 @@ define void @umulh_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: umulh_v2i16:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    ptrue p0.s, vl2
 ; SVE-NEXT:    and z0.s, z0.s, #0xffff
 ; SVE-NEXT:    and z1.s, z1.s, #0xffff
+; SVE-NEXT:    ptrue p0.s, vl2
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
 ; SVE-NEXT:    lsr z0.s, z0.s, #16
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    and z0.s, z0.s, #0xffff
 ; SVE2-NEXT:    and z1.s, z1.s, #0xffff
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
 ; SVE2-NEXT:    lsr z0.s, z0.s, #16
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i16:
@@ -1815,18 +1731,12 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: umulh_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i16:
@@ -1869,18 +1779,12 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: umulh_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v8i16:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v8i16:
@@ -2099,18 +2003,12 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: umulh_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i32:
@@ -2118,10 +2016,14 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w11, w10, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    umull x9, w9, w10
-; NONEON-NOSVE-NEXT:    umull x8, w8, w11
+; NONEON-NOSVE-NEXT:    mov w10, w11
+; NONEON-NOSVE-NEXT:    umull x8, w8, w10
 ; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
@@ -2140,18 +2042,12 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: umulh_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i32:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i32:
@@ -2162,19 +2058,27 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #32]
+; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
 ; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldp w13, w12, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
 ; NONEON-NOSVE-NEXT:    umull x11, w11, w12
+; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
 ; NONEON-NOSVE-NEXT:    ldp w14, w12, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
+; NONEON-NOSVE-NEXT:    umull x9, w9, w12
 ; NONEON-NOSVE-NEXT:    umull x10, w10, w13
 ; NONEON-NOSVE-NEXT:    lsr x11, x11, #32
-; NONEON-NOSVE-NEXT:    umull x9, w9, w12
-; NONEON-NOSVE-NEXT:    umull x8, w8, w14
-; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
+; NONEON-NOSVE-NEXT:    mov w12, w14
+; NONEON-NOSVE-NEXT:    umull x8, w8, w12
 ; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
-; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #72]
+; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
+; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #64]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #80
@@ -2224,30 +2128,46 @@ define void @umulh_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #96]
+; NONEON-NOSVE-NEXT:    // kill: def $x14 killed $w14
+; NONEON-NOSVE-NEXT:    // kill: def $x15 killed $w15
 ; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #104]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #64]
+; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
+; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
+; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp w17, w16, [sp, #112]
+; NONEON-NOSVE-NEXT:    // kill: def $x16 killed $w16
+; NONEON-NOSVE-NEXT:    // kill: def $x17 killed $w17
 ; NONEON-NOSVE-NEXT:    umull x15, w15, w16
-; NONEON-NOSVE-NEXT:    ldp w18, w16, [sp, #120]
+; NONEON-NOSVE-NEXT:    ldp w16, w2, [sp, #88]
 ; NONEON-NOSVE-NEXT:    umull x14, w14, w17
-; NONEON-NOSVE-NEXT:    ldp w17, w1, [sp, #80]
-; NONEON-NOSVE-NEXT:    umull x13, w13, w16
+; NONEON-NOSVE-NEXT:    // kill: def $x16 killed $w16
+; NONEON-NOSVE-NEXT:    ldp w17, w3, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp w18, w1, [sp, #120]
+; NONEON-NOSVE-NEXT:    // kill: def $x1 killed $w1
+; NONEON-NOSVE-NEXT:    // kill: def $x18 killed $w18
+; NONEON-NOSVE-NEXT:    umull x13, w13, w1
+; NONEON-NOSVE-NEXT:    // kill: def $x17 killed $w17
 ; NONEON-NOSVE-NEXT:    lsr x15, x15, #32
-; NONEON-NOSVE-NEXT:    umull x12, w12, w18
 ; NONEON-NOSVE-NEXT:    lsr x14, x14, #32
-; NONEON-NOSVE-NEXT:    ldp w16, w18, [sp, #88]
+; NONEON-NOSVE-NEXT:    umull x12, w12, w18
+; NONEON-NOSVE-NEXT:    mov w18, w2
+; NONEON-NOSVE-NEXT:    mov w1, w3
 ; NONEON-NOSVE-NEXT:    umull x11, w11, w1
-; NONEON-NOSVE-NEXT:    lsr x13, x13, #32
 ; NONEON-NOSVE-NEXT:    stp w14, w15, [sp, #152]
 ; NONEON-NOSVE-NEXT:    umull x10, w10, w17
-; NONEON-NOSVE-NEXT:    lsr x12, x12, #32
+; NONEON-NOSVE-NEXT:    lsr x13, x13, #32
 ; NONEON-NOSVE-NEXT:    umull x9, w9, w18
+; NONEON-NOSVE-NEXT:    lsr x12, x12, #32
 ; NONEON-NOSVE-NEXT:    umull x8, w8, w16
 ; NONEON-NOSVE-NEXT:    lsr x11, x11, #32
-; NONEON-NOSVE-NEXT:    stp w12, w13, [sp, #144]
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
+; NONEON-NOSVE-NEXT:    stp w12, w13, [sp, #144]
 ; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #136]
@@ -2271,18 +2191,12 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: umulh_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
-; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v1i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
-; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v1i64:
@@ -2308,18 +2222,12 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: umulh_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
-; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
-; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i64:
 ; SVE2:       // %bb.0:
-; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
-; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
-; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 2678324728d0e..65cbec9cc8d09 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,7 +13,6 @@ define i8 @uaddv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: uaddv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -48,7 +47,6 @@ define i8 @uaddv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: uaddv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -183,7 +181,6 @@ define i16 @uaddv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: uaddv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -210,7 +207,6 @@ define i16 @uaddv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: uaddv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -297,7 +293,6 @@ define i32 @uaddv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: uaddv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -319,7 +314,6 @@ define i32 @uaddv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: uaddv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -375,7 +369,6 @@ define i64 @uaddv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: uaddv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -425,7 +418,6 @@ define i8 @smaxv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: smaxv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -467,7 +459,6 @@ define i8 @smaxv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: smaxv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -648,7 +639,6 @@ define i16 @smaxv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: smaxv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -678,7 +668,6 @@ define i16 @smaxv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: smaxv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -787,7 +776,6 @@ define i32 @smaxv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: smaxv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -810,7 +798,6 @@ define i32 @smaxv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: smaxv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -878,7 +865,6 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: smaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -933,7 +919,6 @@ define i8 @sminv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: sminv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -975,7 +960,6 @@ define i8 @sminv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: sminv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1156,7 +1140,6 @@ define i16 @sminv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: sminv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1186,7 +1169,6 @@ define i16 @sminv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: sminv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1295,7 +1277,6 @@ define i32 @sminv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: sminv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1318,7 +1299,6 @@ define i32 @sminv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: sminv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1386,7 +1366,6 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: sminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1441,7 +1420,6 @@ define i8 @umaxv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: umaxv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1483,7 +1461,6 @@ define i8 @umaxv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: umaxv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1664,7 +1641,6 @@ define i16 @umaxv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: umaxv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1694,7 +1670,6 @@ define i16 @umaxv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: umaxv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1803,7 +1778,6 @@ define i32 @umaxv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: umaxv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1826,7 +1800,6 @@ define i32 @umaxv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: umaxv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1894,7 +1867,6 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: umaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1949,7 +1921,6 @@ define i8 @uminv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: uminv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1991,7 +1962,6 @@ define i8 @uminv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: uminv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2172,7 +2142,6 @@ define i16 @uminv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: uminv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2202,7 +2171,6 @@ define i16 @uminv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: uminv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2311,7 +2279,6 @@ define i32 @uminv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: uminv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2334,7 +2301,6 @@ define i32 @uminv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: uminv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2402,7 +2368,6 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: uminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index c4b6c0e6e924c..48da301dbc37c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,8 +13,6 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: srem_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p1.s, vl4
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
@@ -23,7 +21,6 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    sdivr z2.s, p1/m, z2.s, z3.s
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i8:
@@ -61,8 +58,6 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: srem_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -81,7 +76,6 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v8i8:
@@ -139,8 +133,6 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: srem_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -161,25 +153,24 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    movprfx z3, z1
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z3.h, z3.b
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    sunpklo z6.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v16i8:
@@ -306,8 +297,10 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q2, [x0]
 ; CHECK-NEXT:    ldr q3, [x1]
 ; CHECK-NEXT:    sunpklo z16.s, z16.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    sunpklo z17.h, z3.b
 ; CHECK-NEXT:    sunpklo z18.h, z2.b
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    sunpklo z19.s, z17.h
 ; CHECK-NEXT:    sunpklo z20.s, z18.h
@@ -315,10 +308,12 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    sunpklo z17.s, z17.h
 ; CHECK-NEXT:    sunpklo z18.s, z18.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
 ; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
 ; CHECK-NEXT:    movprfx z20, z2
 ; CHECK-NEXT:    ext z20.b, z20.b, z2.b, #8
 ; CHECK-NEXT:    sunpklo z20.h, z20.b
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
 ; CHECK-NEXT:    sunpklo z22.s, z20.h
 ; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
 ; CHECK-NEXT:    sunpklo z20.s, z20.h
@@ -326,34 +321,30 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movprfx z18, z3
 ; CHECK-NEXT:    ext z18.b, z18.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z18.h, z18.b
+; CHECK-NEXT:    uzp1 z16.h, z19.h, z19.h
 ; CHECK-NEXT:    sunpklo z21.s, z18.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    sunpklo z18.s, z18.h
 ; CHECK-NEXT:    sdivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT:    uzp1 z16.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
-; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
-; CHECK-NEXT:    uzp1 z4.h, z21.h, z21.h
-; CHECK-NEXT:    splice z6.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    uzp1 z5.h, z18.h, z18.h
+; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z16.b, z6.b, z6.b
-; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z19.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z16.b, z16.b, z16.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z20.h, z18.h, z18.h
+; CHECK-NEXT:    splice z5.h, p0, { z19.h, z20.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z17.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT:    splice z4.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z17.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z5.b, z6.b, z6.b
+; CHECK-NEXT:    splice z6.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    splice z4.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    mls z2.b, p0/m, z4.b, z3.b
-; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
+; CHECK-NEXT:    mls z2.b, p0/m, z6.b, z3.b
+; CHECK-NEXT:    mls z0.b, p0/m, z4.b, z1.b
 ; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -539,8 +530,6 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: srem_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -548,7 +537,6 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i16:
@@ -586,8 +574,6 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: srem_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    movprfx z4, z0
@@ -600,12 +586,11 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v8i16:
@@ -685,14 +670,14 @@ define void @srem_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movprfx z7, z1
 ; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z7.s, z7.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    splice z2.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
+; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
@@ -802,12 +787,9 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: srem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v2i32:
@@ -835,12 +817,9 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: srem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i32:
@@ -945,12 +924,9 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: srem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v1i64:
@@ -973,12 +949,9 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: srem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v2i64:
@@ -1060,18 +1033,15 @@ define void @srem_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: urem_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i8:
@@ -1109,8 +1079,6 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: urem_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1129,7 +1097,6 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v8i8:
@@ -1187,8 +1154,6 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: urem_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1209,25 +1174,24 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    movprfx z3, z1
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z3.h, z3.b
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    uunpklo z6.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
+; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v16i8:
@@ -1354,8 +1318,10 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q2, [x0]
 ; CHECK-NEXT:    ldr q3, [x1]
 ; CHECK-NEXT:    uunpklo z16.s, z16.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    uunpklo z17.h, z3.b
 ; CHECK-NEXT:    uunpklo z18.h, z2.b
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    uunpklo z19.s, z17.h
 ; CHECK-NEXT:    uunpklo z20.s, z18.h
@@ -1363,10 +1329,12 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    uunpklo z17.s, z17.h
 ; CHECK-NEXT:    uunpklo z18.s, z18.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
 ; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
 ; CHECK-NEXT:    movprfx z20, z2
 ; CHECK-NEXT:    ext z20.b, z20.b, z2.b, #8
 ; CHECK-NEXT:    uunpklo z20.h, z20.b
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
 ; CHECK-NEXT:    uunpklo z22.s, z20.h
 ; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
 ; CHECK-NEXT:    uunpklo z20.s, z20.h
@@ -1374,34 +1342,30 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movprfx z18, z3
 ; CHECK-NEXT:    ext z18.b, z18.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z18.h, z18.b
+; CHECK-NEXT:    uzp1 z16.h, z19.h, z19.h
 ; CHECK-NEXT:    uunpklo z21.s, z18.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    uunpklo z18.s, z18.h
 ; CHECK-NEXT:    udivr z21.s, p0/m, z21.s, z22.s
-; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z20.s
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
-; CHECK-NEXT:    uzp1 z16.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
-; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
-; CHECK-NEXT:    uzp1 z4.h, z21.h, z21.h
-; CHECK-NEXT:    splice z6.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    uzp1 z5.h, z18.h, z18.h
+; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z16.b, z6.b, z6.b
-; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z19.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z16.b, z16.b, z16.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z20.h, z18.h, z18.h
+; CHECK-NEXT:    splice z5.h, p0, { z19.h, z20.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z17.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
-; CHECK-NEXT:    splice z4.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z17.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z5.b, z6.b, z6.b
+; CHECK-NEXT:    splice z6.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    splice z4.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    mls z2.b, p0/m, z4.b, z3.b
-; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
+; CHECK-NEXT:    mls z2.b, p0/m, z6.b, z3.b
+; CHECK-NEXT:    mls z0.b, p0/m, z4.b, z1.b
 ; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -1587,8 +1551,6 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: urem_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1596,7 +1558,6 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i16:
@@ -1634,8 +1595,6 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: urem_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    movprfx z4, z0
@@ -1648,12 +1607,11 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v8i16:
@@ -1733,14 +1691,14 @@ define void @urem_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    movprfx z7, z1
 ; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z7.s, z7.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    splice z2.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
+; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
@@ -1850,12 +1808,9 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: urem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v2i32:
@@ -1883,12 +1838,9 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: urem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i32:
@@ -1993,12 +1945,9 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: urem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v1i64:
@@ -2021,12 +1970,9 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: urem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 4ac156c42fda0..e91d78a75cb85 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,12 +10,9 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i8:
@@ -52,11 +49,8 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, w0
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i8:
@@ -109,11 +103,8 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, w0
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v16i8:
@@ -363,12 +354,9 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i16:
@@ -395,12 +383,9 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i16:
@@ -437,12 +422,9 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i16:
@@ -597,12 +579,9 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i32:
@@ -629,12 +608,9 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i32:
@@ -727,15 +703,11 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) {
 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v1i64:
@@ -757,15 +729,11 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i64:
@@ -789,7 +757,6 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 4cf8945575ded..cf3e10a2217ae 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -13,12 +13,9 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: ashr_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i8:
@@ -29,16 +26,20 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldrsb w11, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    ldrsb w13, [sp, #10]
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrsb w14, [sp, #8]
 ; NONEON-NOSVE-NEXT:    asr w10, w11, w10
-; NONEON-NOSVE-NEXT:    asr w11, w13, w12
+; NONEON-NOSVE-NEXT:    mov w11, w12
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    asr w8, w14, w9
+; NONEON-NOSVE-NEXT:    mov w8, w9
+; NONEON-NOSVE-NEXT:    asr w11, w13, w11
+; NONEON-NOSVE-NEXT:    asr w8, w14, w8
 ; NONEON-NOSVE-NEXT:    strh w10, [sp, #28]
 ; NONEON-NOSVE-NEXT:    strh w11, [sp, #26]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
@@ -53,10 +54,7 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: ashr_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v8i8:
@@ -66,34 +64,42 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -107,10 +113,7 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: ashr_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v16i8:
@@ -119,66 +122,82 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -209,130 +228,162 @@ define void @ashr_v32i8(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #47]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #45]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #43]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #41]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #39]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #37]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #35]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #33]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -350,12 +401,9 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: ashr_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i16:
@@ -367,8 +415,10 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrsh w11, [sp, #8]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
-; NONEON-NOSVE-NEXT:    asr w9, w11, w10
+; NONEON-NOSVE-NEXT:    mov w9, w10
+; NONEON-NOSVE-NEXT:    asr w9, w11, w9
 ; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
@@ -381,10 +431,7 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: ashr_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i16:
@@ -394,18 +441,22 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -419,10 +470,7 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: ashr_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v8i16:
@@ -431,34 +479,42 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -489,66 +545,82 @@ define void @ashr_v16i16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #46]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -566,10 +638,7 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: ashr_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i32:
@@ -579,8 +648,10 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -594,10 +665,7 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: ashr_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i32:
@@ -606,14 +674,18 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -644,26 +716,34 @@ define void @ashr_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -681,10 +761,7 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: ashr_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v1i64:
@@ -706,10 +783,7 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: ashr_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i64:
@@ -778,13 +852,10 @@ define void @ashr_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: lshr_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i8:
@@ -795,16 +866,20 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #10]
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #8]
 ; NONEON-NOSVE-NEXT:    lsr w10, w11, w10
-; NONEON-NOSVE-NEXT:    lsr w11, w13, w12
+; NONEON-NOSVE-NEXT:    mov w11, w12
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    lsr w8, w14, w9
+; NONEON-NOSVE-NEXT:    mov w8, w9
+; NONEON-NOSVE-NEXT:    lsr w11, w13, w11
+; NONEON-NOSVE-NEXT:    lsr w8, w14, w8
 ; NONEON-NOSVE-NEXT:    strh w10, [sp, #28]
 ; NONEON-NOSVE-NEXT:    strh w11, [sp, #26]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
@@ -819,10 +894,7 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: lshr_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v8i8:
@@ -832,34 +904,42 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -873,10 +953,7 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: lshr_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v16i8:
@@ -885,66 +962,82 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -975,130 +1068,162 @@ define void @lshr_v32i8(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #47]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #45]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #43]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #39]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #37]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #35]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #33]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1115,13 +1240,10 @@ define void @lshr_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: lshr_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i16:
@@ -1133,8 +1255,10 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #8]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
-; NONEON-NOSVE-NEXT:    lsr w9, w11, w10
+; NONEON-NOSVE-NEXT:    mov w9, w10
+; NONEON-NOSVE-NEXT:    lsr w9, w11, w9
 ; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
@@ -1147,10 +1271,7 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: lshr_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i16:
@@ -1160,18 +1281,22 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1185,10 +1310,7 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: lshr_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v8i16:
@@ -1197,34 +1319,42 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1255,66 +1385,82 @@ define void @lshr_v16i16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1332,10 +1478,7 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: lshr_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i32:
@@ -1345,8 +1488,10 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1360,10 +1505,7 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: lshr_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i32:
@@ -1372,14 +1514,18 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1410,26 +1556,34 @@ define void @lshr_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1447,10 +1601,7 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: lshr_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v1i64:
@@ -1472,10 +1623,7 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: lshr_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i64:
@@ -1544,12 +1692,9 @@ define void @lshr_v4i64(ptr %a, ptr %b) {
 define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 ; CHECK-LABEL: shl_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i8:
@@ -1560,6 +1705,8 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #20]
 ; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w9
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
@@ -1574,12 +1721,9 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: shl_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i8:
@@ -1590,9 +1734,13 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #14]
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
+; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #18]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
 ; NONEON-NOSVE-NEXT:    lsl w11, w12, w11
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    strh w11, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #12]
 ; NONEON-NOSVE-NEXT:    lsl w10, w11, w10
@@ -1614,10 +1762,7 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: shl_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v8i8:
@@ -1627,34 +1772,42 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1668,10 +1821,7 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: shl_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v16i8:
@@ -1680,66 +1830,82 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1770,130 +1936,162 @@ define void @shl_v32i8(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #47]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #45]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #43]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #39]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #37]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #35]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #33]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1911,10 +2109,7 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: shl_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i16:
@@ -1924,18 +2119,22 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1949,10 +2148,7 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: shl_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v8i16:
@@ -1961,34 +2157,42 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -2019,66 +2223,82 @@ define void @shl_v16i16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -2096,10 +2316,7 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: shl_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i32:
@@ -2109,8 +2326,10 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -2124,10 +2343,7 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: shl_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i32:
@@ -2136,14 +2352,18 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -2174,26 +2394,34 @@ define void @shl_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -2211,10 +2439,7 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: shl_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v1i64:
@@ -2236,10 +2461,7 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: shl_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index e4763f0bb9ba9..c5da8049736b5 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -1,23 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
 define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i8:
@@ -64,16 +60,12 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
 define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.b, z2.b, #7
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
 ; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i8:
@@ -148,16 +140,12 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
 define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) {
 ; CHECK-LABEL: select_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z2.b, z2.b, #7
+; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
 ; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v16i8:
@@ -292,6 +280,7 @@ define void @select_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    ldp q2, q1, [x0]
 ; CHECK-NEXT:    mov w8, #16 // =0x10
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    cmpne p1.b, p0/z, z1.b, z0.b
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, z3.b
 ; CHECK-NEXT:    st1b { z0.b }, p1, [x0, x8]
@@ -510,16 +499,12 @@ define void @select_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i16:
@@ -551,16 +536,12 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
 define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i16:
@@ -607,17 +588,13 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
 define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.h, z2.b
+; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i16:
@@ -808,16 +785,12 @@ define void @select_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i32:
@@ -849,17 +822,13 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
 define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i32:
@@ -967,15 +936,11 @@ define void @select_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
 ; CHECK-LABEL: select_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v1i64:
@@ -997,17 +962,13 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i64:
@@ -1016,10 +977,12 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    str d2, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #40]
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
+; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    ldr x10, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr x11, [sp, #8]
-; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    sbfx x9, x9, #0, #1
 ; NONEON-NOSVE-NEXT:    cmp x8, #0
 ; NONEON-NOSVE-NEXT:    csel x8, x11, x10, ne
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 302942178a764..d74faa9f4fe09 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -19,8 +19,8 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind {
 ; CHECK-NEXT:    ptrue p0.b, vl2
 ; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x20]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    mov z2.b, z0.b[1]
-; CHECK-NEXT:    zip1 z0.s, z0.s, z2.s
+; CHECK-NEXT:    mov z1.b, z0.b[1]
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    st1b { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
@@ -180,9 +180,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #4 // =0x4
 ; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x20]
-; CHECK-NEXT:    ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
+; CHECK-NEXT:    ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3]
 ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Reload
-; CHECK-NEXT:    stp q0, q2, [x19]
+; CHECK-NEXT:    stp q0, q1, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index daf2734ce7d55..12864895baa50 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -15,7 +15,6 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) {
 ; CHECK-NEXT:    mov z0.s, z1.s[2]
 ; CHECK-NEXT:    add z1.s, z3.s, z3.s
 ; CHECK-NEXT:    add z3.s, z4.s, z4.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    stp q2, q5, [x0, #32]
 ; CHECK-NEXT:    stp q1, q3, [x0]
 ; CHECK-NEXT:    ret
@@ -107,7 +106,6 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
 ; CHECK-NEXT:    add z2.s, z3.s, z3.s
 ; CHECK-NEXT:    add z3.s, z4.s, z4.s
 ; CHECK-NEXT:    stp q1, q5, [x0, #32]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    stp q2, q3, [x0]
 ; CHECK-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index 504db6df18ee5..fde5debe8eda4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -10,7 +10,6 @@ define <4 x i8> @load_v4i8(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v4i8:
@@ -79,7 +78,6 @@ define <2 x i16> @load_v2i16(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v2i16:
@@ -101,6 +99,7 @@ define <2 x half> @load_v2f16(ptr %a) {
 ; CHECK-LABEL: load_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $s0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v2f16:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
index d4565c4b69c77..1882a011e8c5f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -14,7 +14,6 @@ define i8 @andv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: andv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -41,7 +40,6 @@ define i8 @andv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: andv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -76,7 +74,6 @@ define i8 @andv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: andv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -211,7 +208,6 @@ define i16 @andv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: andv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -233,7 +229,6 @@ define i16 @andv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: andv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -260,7 +255,6 @@ define i16 @andv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: andv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -347,7 +341,6 @@ define i32 @andv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: andv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -369,7 +362,6 @@ define i32 @andv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: andv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -425,7 +417,6 @@ define i64 @andv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: andv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -475,7 +466,6 @@ define i8 @eorv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: eorv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -502,7 +492,6 @@ define i8 @eorv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: eorv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -537,7 +526,6 @@ define i8 @eorv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: eorv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -672,7 +660,6 @@ define i16 @eorv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: eorv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -694,7 +681,6 @@ define i16 @eorv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: eorv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -721,7 +707,6 @@ define i16 @eorv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: eorv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -808,7 +793,6 @@ define i32 @eorv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: eorv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -830,7 +814,6 @@ define i32 @eorv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: eorv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -886,7 +869,6 @@ define i64 @eorv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: eorv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -936,7 +918,6 @@ define i8 @orv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: orv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -963,7 +944,6 @@ define i8 @orv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: orv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -998,7 +978,6 @@ define i8 @orv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: orv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1133,7 +1112,6 @@ define i16 @orv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: orv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1155,7 +1133,6 @@ define i16 @orv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: orv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1182,7 +1159,6 @@ define i16 @orv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: orv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1269,7 +1245,6 @@ define i32 @orv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: orv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1291,7 +1266,6 @@ define i32 @orv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: orv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1347,7 +1321,6 @@ define i64 @orv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: orv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index b1ac9469c0573..4d8d76416b856 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming  < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -36,13 +36,13 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    index z2.d, #0, #1
 ; CHECK-NEXT:    mov z1.d, z1.d[1]
+; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    mov z3.d, x8
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
 ; CHECK-NEXT:    ldr x8, [x8]
 ; CHECK-NEXT:    mov z0.d, p0/m, x8
 ; CHECK-NEXT:  .LBB0_4: // %else2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
 ;
@@ -53,6 +53,7 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
 ; NONEON-NOSVE-NEXT:    ldr q1, [x1]
 ; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    str q0, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #112]
 ; NONEON-NOSVE-NEXT:    cmp x9, #0
@@ -137,6 +138,7 @@ define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; NONEON-NOSVE-NEXT:    ldr q1, [x0]
 ; NONEON-NOSVE-NEXT:    ldr q0, [x1]
 ; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
+; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #64]
 ; NONEON-NOSVE-NEXT:    cmp x9, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
index e9b2f539b30cc..41a9b4b292d0b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -768,6 +768,8 @@ define void @fadd_v2f16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ldr s1, [x1]
+; CHECK-NEXT:    // kill: def $d0 killed $s0
+; CHECK-NEXT:    // kill: def $d1 killed $s1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index 890bc721128ff..499e3c0937dc2 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -480,7 +480,6 @@ define <16 x i8> @test_revv16i8(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ldr q0, [x0]
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_revv16i8:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 5586de39fb1bf..fc7ffc447a9d0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -28,9 +28,9 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
@@ -122,40 +122,40 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ldp q4, q5, [x1, #32]
 ; CHECK-NEXT:    ldp q2, q3, [x0]
+; CHECK-NEXT:    ldp q4, q5, [x1, #32]
 ; CHECK-NEXT:    fcmne p1.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    ldp q0, q1, [x1]
-; CHECK-NEXT:    fcmne p5.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
+; CHECK-NEXT:    fcmne p4.s, p0/z, z2.s, #0.0
+; CHECK-NEXT:    fcmne p5.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p6.s, p0/z, z4.s, #0.0
 ; CHECK-NEXT:    fcmne p7.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    fcmne p4.s, p0/z, z2.s, #0.0
-; CHECK-NEXT:    mov z2.s, p5/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z3.s, p3/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z3.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z2.s, p5/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z4.s, p6/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
 ; CHECK-NEXT:    mov z5.s, p7/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
 ; CHECK-NEXT:    mov z0.s, p4/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
 ; CHECK-NEXT:    mov z16.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z3.h, z16.h, z16.h
 ; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z3.h, z16.h, z16.h
 ; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    splice z2.h, p0, { z17.h, z18.h }
 ; CHECK-NEXT:    splice z3.h, p0, { z3.h, z4.h }
-; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
 ; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
 ; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
@@ -331,40 +331,40 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    ldp q4, q5, [x1, #32]
 ; CHECK-NEXT:    ldp q2, q3, [x0]
+; CHECK-NEXT:    ldp q4, q5, [x1, #32]
 ; CHECK-NEXT:    fcmne p1.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    ldp q0, q1, [x1]
-; CHECK-NEXT:    fcmne p5.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
+; CHECK-NEXT:    fcmne p4.s, p0/z, z2.s, #0.0
+; CHECK-NEXT:    fcmne p5.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p6.s, p0/z, z4.s, #0.0
 ; CHECK-NEXT:    fcmne p7.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    fcmne p4.s, p0/z, z2.s, #0.0
-; CHECK-NEXT:    mov z2.s, p5/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z3.s, p3/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    fcmne p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z3.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z2.s, p5/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z4.s, p6/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
 ; CHECK-NEXT:    mov z5.s, p7/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
 ; CHECK-NEXT:    mov z0.s, p4/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
 ; CHECK-NEXT:    mov z16.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z3.h, z16.h, z16.h
 ; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z3.h, z16.h, z16.h
 ; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    splice z2.h, p0, { z17.h, z18.h }
 ; CHECK-NEXT:    splice z3.h, p0, { z3.h, z4.h }
-; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
 ; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
 ; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
index 7363c306033a1..01d77db4a832a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT
-; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
-; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT
+; RUN: llc -enable-subreg-liveness -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
+; RUN: llc -enable-subreg-liveness -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -36,34 +36,32 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
-; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
-; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-SVE-NEXT:    movprfx z2, z1
 ; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z1.b, #8
 ; STREAMING-SVE-NEXT:    movprfx z3, z0
 ; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    uunpklo z1.h, z1.b
 ; STREAMING-SVE-NEXT:    uunpklo z0.h, z0.b
+; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    uunpklo z2.h, z2.b
 ; STREAMING-SVE-NEXT:    uunpklo z3.h, z3.b
 ; STREAMING-SVE-NEXT:    movprfx z4, z1
 ; STREAMING-SVE-NEXT:    ext z4.b, z4.b, z1.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z1.s, z1.h
 ; STREAMING-SVE-NEXT:    movprfx z7, z0
 ; STREAMING-SVE-NEXT:    ext z7.b, z7.b, z0.b, #8
-; STREAMING-SVE-NEXT:    uunpklo z1.s, z1.h
 ; STREAMING-SVE-NEXT:    uunpklo z0.s, z0.h
 ; STREAMING-SVE-NEXT:    movprfx z5, z2
 ; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z2.b, #8
 ; STREAMING-SVE-NEXT:    movprfx z6, z3
 ; STREAMING-SVE-NEXT:    ext z6.b, z6.b, z3.b, #8
-; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    uunpklo z4.s, z4.h
 ; STREAMING-SVE-NEXT:    uunpklo z7.s, z7.h
+; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    uunpklo z3.s, z3.h
+; STREAMING-SVE-NEXT:    add z0.s, z0.s, z1.s
 ; STREAMING-SVE-NEXT:    uunpklo z5.s, z5.h
 ; STREAMING-SVE-NEXT:    uunpklo z6.s, z6.h
-; STREAMING-SVE-NEXT:    add z0.s, z0.s, z1.s
 ; STREAMING-SVE-NEXT:    add z1.s, z3.s, z2.s
 ; STREAMING-SVE-NEXT:    add z2.s, z7.s, z4.s
 ; STREAMING-SVE-NEXT:    add z3.s, z6.s, z5.s
@@ -108,34 +106,32 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
-; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
-; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-SVE-NEXT:    movprfx z2, z1
 ; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z1.b, #8
 ; STREAMING-SVE-NEXT:    movprfx z3, z0
 ; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z0.b, #8
-; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    sunpklo z1.h, z1.b
 ; STREAMING-SVE-NEXT:    sunpklo z0.h, z0.b
+; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
 ; STREAMING-SVE-NEXT:    sunpklo z2.h, z2.b
 ; STREAMING-SVE-NEXT:    sunpklo z3.h, z3.b
 ; STREAMING-SVE-NEXT:    movprfx z4, z1
 ; STREAMING-SVE-NEXT:    ext z4.b, z4.b, z1.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z1.s, z1.h
 ; STREAMING-SVE-NEXT:    movprfx z7, z0
 ; STREAMING-SVE-NEXT:    ext z7.b, z7.b, z0.b, #8
-; STREAMING-SVE-NEXT:    sunpklo z1.s, z1.h
 ; STREAMING-SVE-NEXT:    sunpklo z0.s, z0.h
 ; STREAMING-SVE-NEXT:    movprfx z5, z2
 ; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z2.b, #8
 ; STREAMING-SVE-NEXT:    movprfx z6, z3
 ; STREAMING-SVE-NEXT:    ext z6.b, z6.b, z3.b, #8
-; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    sunpklo z4.s, z4.h
 ; STREAMING-SVE-NEXT:    sunpklo z7.s, z7.h
+; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    sunpklo z3.s, z3.h
+; STREAMING-SVE-NEXT:    add z0.s, z0.s, z1.s
 ; STREAMING-SVE-NEXT:    sunpklo z5.s, z5.h
 ; STREAMING-SVE-NEXT:    sunpklo z6.s, z6.h
-; STREAMING-SVE-NEXT:    add z0.s, z0.s, z1.s
 ; STREAMING-SVE-NEXT:    add z1.s, z3.s, z2.s
 ; STREAMING-SVE-NEXT:    add z2.s, z7.s, z4.s
 ; STREAMING-SVE-NEXT:    add z3.s, z6.s, z5.s
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
index 8e12b861abf3f..720e0fc77a8c3 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -17,7 +17,6 @@ define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %el0 = extractelement <vscale x 4 x i1> %a, i32 0
   %el1 = extractelement <vscale x 4 x i1> %a, i32 1
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index c34cae12516ed..cdee37c66ea81 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -14,10 +14,8 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    lsr z0.h, z0.h, #8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i8:
@@ -51,9 +49,7 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v8i8:
@@ -103,9 +99,7 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v16i8:
@@ -340,10 +334,8 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i16:
@@ -368,9 +360,7 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i16:
@@ -404,9 +394,7 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v8i16:
@@ -545,9 +533,7 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: bitreverse_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i32:
@@ -570,9 +556,7 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: bitreverse_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i32:
@@ -645,9 +629,7 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: bitreverse_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v1i64:
@@ -668,9 +650,7 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: bitreverse_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i64:
@@ -732,10 +712,8 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: bswap_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i16:
@@ -774,9 +752,7 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: bswap_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v4i16:
@@ -810,9 +786,7 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: bswap_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.h, p0/m, z0.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v8i16:
@@ -954,9 +928,7 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: bswap_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i32:
@@ -990,9 +962,7 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: bswap_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v4i32:
@@ -1134,9 +1104,7 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: bswap_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v1i64:
@@ -1170,9 +1138,7 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: bswap_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index 85ba964000234..1f6c346ecc90f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -10,10 +10,8 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
@@ -59,9 +57,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
@@ -127,9 +123,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
@@ -460,10 +454,8 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
@@ -491,9 +483,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
@@ -535,9 +525,7 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
@@ -724,9 +712,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
@@ -753,9 +739,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
@@ -852,9 +836,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
@@ -878,9 +860,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
index cfe50ad78e7ba..c5e6c05f17807 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -9,9 +9,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -38,8 +37,6 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2
 ; CHECK-LABEL: interleave_store_without_splat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -75,13 +72,12 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2)
 ; CHECK-LABEL: interleave_store_legalization:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z2_z3
-; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    mov z2.d, z1.d
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    mov z2.d, z3.d
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    st2w { z4.s, z5.s }, p0, [x0]
-; CHECK-NEXT:    st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT:    st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: interleave_store_legalization:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 13fcd94ea8a26..ae2b0d238ebd7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -129,8 +129,8 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) {
 define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
 ; CHECK-LABEL: store_trunc_v2i256i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr d1, [x0, #32]
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    ldr d1, [x0, #32]
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; CHECK-NEXT:    str q0, [x1]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index 4a540813f0773..e3f35f86ec92a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -15,10 +15,9 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z2.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8:
@@ -223,16 +222,16 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
 ; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z3.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z17.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
 ; CHECK-NEXT:    splice z0.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z3.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z3.b, p0, { z16.b, z17.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    add z2.b, z2.b, z2.b
@@ -527,49 +526,49 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    ldp q16, q17, [x0, #224]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
-; CHECK-NEXT:    ldp q3, q18, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
-; CHECK-NEXT:    ldp q2, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
-; CHECK-NEXT:    uzp1 z21.b, z18.b, z18.b
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z24.b, z19.b, z19.b
-; CHECK-NEXT:    ldp q3, q19, [x0, #96]
-; CHECK-NEXT:    uzp1 z23.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z26.b, z22.b, z22.b
-; CHECK-NEXT:    splice z2.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z25.b, z18.b, z18.b
-; CHECK-NEXT:    splice z7.b, p0, { z20.b, z21.b }
-; CHECK-NEXT:    uzp1 z21.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z17.b, z17.b, z17.b
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z16.b, z16.b, z16.b
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    uzp1 z19.b, z19.b, z19.b
-; CHECK-NEXT:    uzp1 z20.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z5.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z16.b, z6.b, z6.b
-; CHECK-NEXT:    splice z6.b, p0, { z23.b, z24.b }
-; CHECK-NEXT:    uzp1 z18.b, z3.b, z3.b
-; CHECK-NEXT:    splice z3.b, p0, { z25.b, z26.b }
-; CHECK-NEXT:    uzp1 z4.b, z0.b, z0.b
-; CHECK-NEXT:    add z0.b, z2.b, z2.b
-; CHECK-NEXT:    add z7.b, z7.b, z7.b
-; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    splice z2.b, p0, { z18.b, z19.b }
-; CHECK-NEXT:    splice z16.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    uzp1 z18.b, z18.b, z18.b
+; CHECK-NEXT:    uzp1 z7.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z21.b, z21.b, z21.b
+; CHECK-NEXT:    uzp1 z20.b, z20.b, z20.b
+; CHECK-NEXT:    splice z3.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
+; CHECK-NEXT:    uzp1 z17.b, z23.b, z23.b
+; CHECK-NEXT:    uzp1 z16.b, z22.b, z22.b
+; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z18.b, p0, { z18.b, z19.b }
+; CHECK-NEXT:    splice z19.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    splice z6.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    add z2.b, z2.b, z2.b
 ; CHECK-NEXT:    splice z4.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    add z6.b, z6.b, z6.b
 ; CHECK-NEXT:    add z3.b, z3.b, z3.b
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    add z0.b, z1.b, z1.b
-; CHECK-NEXT:    add z1.b, z2.b, z2.b
-; CHECK-NEXT:    add z2.b, z16.b, z16.b
-; CHECK-NEXT:    stp q6, q3, [x1, #64]
-; CHECK-NEXT:    add z3.b, z4.b, z4.b
-; CHECK-NEXT:    stp q0, q1, [x1, #32]
-; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    add z5.b, z18.b, z18.b
+; CHECK-NEXT:    add z1.b, z19.b, z19.b
+; CHECK-NEXT:    stp q2, q3, [x1, #96]
+; CHECK-NEXT:    add z2.b, z6.b, z6.b
+; CHECK-NEXT:    add z3.b, z7.b, z7.b
+; CHECK-NEXT:    add z4.b, z4.b, z4.b
+; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    stp q5, q1, [x1, #64]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q4, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
@@ -1183,11 +1182,10 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8:
@@ -1227,10 +1225,9 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8:
@@ -1282,25 +1279,25 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z16.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z19.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
-; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z2.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
+; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z1.b, z2.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -1429,56 +1426,56 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #160]
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    ldp q16, q17, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #64]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #96]
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    ldp q3, q18, [x0, #128]
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
-; CHECK-NEXT:    ldp q2, q19, [x0, #192]
-; CHECK-NEXT:    ldp q0, q1, [x0, #64]
-; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
-; CHECK-NEXT:    ldp q18, q22, [x0, #224]
-; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
-; CHECK-NEXT:    ldp q3, q23, [x0, #32]
-; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z27.h, z19.h, z19.h
-; CHECK-NEXT:    uzp1 z25.h, z22.h, z22.h
-; CHECK-NEXT:    uzp1 z26.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z24.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z18.h, z23.h, z23.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
-; CHECK-NEXT:    splice z1.h, p0, { z20.h, z21.h }
-; CHECK-NEXT:    splice z6.h, p0, { z24.h, z25.h }
-; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z26.h, z27.h }
-; CHECK-NEXT:    splice z7.h, p0, { z17.h, z18.h }
-; CHECK-NEXT:    uzp1 z17.b, z16.b, z16.b
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    ldp q18, q19, [x0, #192]
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT:    ldp q20, q21, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.h, z16.h, z16.h
+; CHECK-NEXT:    ldp q22, q23, [x0, #32]
 ; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z3.h, p0, { z22.h, z23.h }
+; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z21.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z20.h, z20.h, z20.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z23.h, z23.h, z23.h
+; CHECK-NEXT:    uzp1 z22.h, z22.h, z22.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z17.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z18.h, p0, { z22.h, z23.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    uzp1 z20.b, z16.b, z16.b
+; CHECK-NEXT:    uzp1 z19.b, z17.b, z17.b
+; CHECK-NEXT:    uzp1 z7.b, z18.b, z18.b
 ; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z0.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
-; CHECK-NEXT:    splice z4.b, p0, { z5.b, z6.b }
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    add z2.b, z7.b, z7.b
-; CHECK-NEXT:    add z3.b, z4.b, z4.b
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT:    splice z1.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z19.b, z20.b }
+; CHECK-NEXT:    splice z4.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q0, q1, [x1]
+; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    add z3.b, z4.b, z4.b
+; CHECK-NEXT:    add z2.b, z2.b, z2.b
+; CHECK-NEXT:    stp q1, q0, [x1, #32]
+; CHECK-NEXT:    stp q3, q2, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
@@ -1767,10 +1764,9 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16:
@@ -1884,16 +1880,16 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
 ; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
-; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    add z0.h, z0.h, z0.h
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
 ; CHECK-NEXT:    add z2.h, z2.h, z2.h
@@ -2027,49 +2023,49 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    ldp q16, q17, [x0, #224]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
-; CHECK-NEXT:    ldp q3, q18, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
-; CHECK-NEXT:    ldp q2, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
-; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z24.h, z19.h, z19.h
-; CHECK-NEXT:    ldp q3, q19, [x0, #96]
-; CHECK-NEXT:    uzp1 z23.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z26.h, z22.h, z22.h
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z25.h, z18.h, z18.h
-; CHECK-NEXT:    splice z7.h, p0, { z20.h, z21.h }
-; CHECK-NEXT:    uzp1 z21.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z16.h, z16.h, z16.h
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
-; CHECK-NEXT:    uzp1 z20.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z16.h, z6.h, z6.h
-; CHECK-NEXT:    splice z6.h, p0, { z23.h, z24.h }
-; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
-; CHECK-NEXT:    splice z3.h, p0, { z25.h, z26.h }
-; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
-; CHECK-NEXT:    add z0.h, z2.h, z2.h
-; CHECK-NEXT:    add z7.h, z7.h, z7.h
-; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z21.h, z21.h, z21.h
+; CHECK-NEXT:    uzp1 z20.h, z20.h, z20.h
+; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z17.h, z23.h, z23.h
+; CHECK-NEXT:    uzp1 z16.h, z22.h, z22.h
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z18.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z19.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    add z2.h, z2.h, z2.h
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    add z6.h, z6.h, z6.h
 ; CHECK-NEXT:    add z3.h, z3.h, z3.h
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    add z0.h, z1.h, z1.h
-; CHECK-NEXT:    add z1.h, z2.h, z2.h
-; CHECK-NEXT:    add z2.h, z16.h, z16.h
-; CHECK-NEXT:    stp q6, q3, [x1, #64]
-; CHECK-NEXT:    add z3.h, z4.h, z4.h
-; CHECK-NEXT:    stp q0, q1, [x1, #32]
-; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    add z5.h, z18.h, z18.h
+; CHECK-NEXT:    add z1.h, z19.h, z19.h
+; CHECK-NEXT:    stp q2, q3, [x1, #96]
+; CHECK-NEXT:    add z2.h, z6.h, z6.h
+; CHECK-NEXT:    add z3.h, z7.h, z7.h
+; CHECK-NEXT:    add z4.h, z4.h, z4.h
+; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    stp q5, q1, [x1, #64]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q4, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
@@ -2362,11 +2358,10 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8:
@@ -2400,11 +2395,10 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8:
@@ -2444,30 +2438,29 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z19.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z1.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z18.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z0.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
-; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    splice z1.s, p0, { z18.s, z19.s }
 ; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z1.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z3.s, p0, { z6.s, z7.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8:
@@ -2525,57 +2518,57 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q5, q6, [x0, #224]
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
-; CHECK-NEXT:    ldp q6, q18, [x0, #192]
-; CHECK-NEXT:    uzp1 z16.s, z5.s, z5.s
-; CHECK-NEXT:    ldp q5, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.s, z6.s, z6.s
-; CHECK-NEXT:    ldp q6, q23, [x0, #96]
-; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z26.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z23.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z6.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z22.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z5.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z3.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
-; CHECK-NEXT:    splice z0.s, p0, { z24.s, z25.s }
-; CHECK-NEXT:    splice z7.s, p0, { z26.s, z27.s }
-; CHECK-NEXT:    splice z4.s, p0, { z17.s, z18.s }
-; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    ldp q16, q17, [x0, #192]
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
+; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
 ; CHECK-NEXT:    splice z5.s, p0, { z5.s, z6.s }
-; CHECK-NEXT:    splice z6.s, p0, { z22.s, z23.s }
-; CHECK-NEXT:    splice z1.s, p0, { z1.s, z2.s }
-; CHECK-NEXT:    uzp1 z16.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
+; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
+; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
+; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z23.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z22.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z6.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z18.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z17.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z19.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z18.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
-; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z19.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z7.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z4.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z7.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z6.b, z3.b, z3.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    splice z1.b, p0, { z3.b, z4.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -2733,11 +2726,10 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16:
@@ -2771,10 +2763,9 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16:
@@ -2815,25 +2806,25 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z19.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z0.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z18.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
-; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    splice z5.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z1.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z3.s, p0, { z6.s, z7.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z1.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    add z0.h, z0.h, z0.h
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -2915,56 +2906,56 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #160]
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    ldp q16, q17, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #64]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #96]
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q3, q18, [x0, #128]
-; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
-; CHECK-NEXT:    ldp q2, q19, [x0, #192]
-; CHECK-NEXT:    ldp q0, q1, [x0, #64]
-; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
-; CHECK-NEXT:    ldp q18, q22, [x0, #224]
-; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q3, q23, [x0, #32]
-; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z26.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z23.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z22.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
-; CHECK-NEXT:    splice z1.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    splice z6.s, p0, { z24.s, z25.s }
-; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
-; CHECK-NEXT:    splice z0.s, p0, { z26.s, z27.s }
-; CHECK-NEXT:    splice z7.s, p0, { z17.s, z18.s }
-; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    ldp q18, q19, [x0, #192]
+; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
+; CHECK-NEXT:    ldp q20, q21, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
+; CHECK-NEXT:    ldp q22, q23, [x0, #32]
 ; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z3.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
+; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
+; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z23.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z22.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z17.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z18.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    uzp1 z20.h, z16.h, z16.h
+; CHECK-NEXT:    uzp1 z19.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z7.h, z18.h, z18.h
 ; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
-; CHECK-NEXT:    splice z4.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    add z2.h, z7.h, z7.h
-; CHECK-NEXT:    add z3.h, z4.h, z4.h
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z19.h, z20.h }
+; CHECK-NEXT:    splice z4.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q0, q1, [x1]
+; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    add z3.h, z4.h, z4.h
+; CHECK-NEXT:    add z2.h, z2.h, z2.h
+; CHECK-NEXT:    stp q1, q0, [x1, #32]
+; CHECK-NEXT:    stp q3, q2, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
@@ -3120,10 +3111,9 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32:
@@ -3209,16 +3199,16 @@ define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z6.s, z1.s, z1.s
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z3.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z2.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z17.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z16.s, z1.s, z1.s
 ; CHECK-NEXT:    splice z0.s, p0, { z6.s, z7.s }
-; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
-; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z3.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
 ; CHECK-NEXT:    add z0.s, z0.s, z0.s
 ; CHECK-NEXT:    add z1.s, z1.s, z1.s
 ; CHECK-NEXT:    add z2.s, z2.s, z2.s
@@ -3297,49 +3287,49 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    ldp q16, q17, [x0, #224]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
+; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q3, q18, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
-; CHECK-NEXT:    ldp q2, q19, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
-; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
-; CHECK-NEXT:    ldp q18, q22, [x0, #160]
-; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z24.s, z19.s, z19.s
-; CHECK-NEXT:    ldp q3, q19, [x0, #96]
-; CHECK-NEXT:    uzp1 z23.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z26.s, z22.s, z22.s
-; CHECK-NEXT:    splice z2.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z17.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z25.s, z18.s, z18.s
-; CHECK-NEXT:    splice z7.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    uzp1 z21.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
+; CHECK-NEXT:    ldp q18, q19, [x0, #128]
+; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
+; CHECK-NEXT:    ldp q20, q21, [x0, #160]
+; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
+; CHECK-NEXT:    ldp q22, q23, [x0, #96]
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
 ; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z20.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z16.s, z6.s, z6.s
-; CHECK-NEXT:    splice z6.s, p0, { z23.s, z24.s }
-; CHECK-NEXT:    uzp1 z18.s, z3.s, z3.s
-; CHECK-NEXT:    splice z3.s, p0, { z25.s, z26.s }
-; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
-; CHECK-NEXT:    add z0.s, z2.s, z2.s
-; CHECK-NEXT:    add z7.s, z7.s, z7.s
-; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    splice z2.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
+; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
+; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z17.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z16.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    splice z18.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z19.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    splice z7.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    add z2.s, z2.s, z2.s
 ; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    add z6.s, z6.s, z6.s
 ; CHECK-NEXT:    add z3.s, z3.s, z3.s
-; CHECK-NEXT:    stp q0, q7, [x1, #96]
-; CHECK-NEXT:    add z0.s, z1.s, z1.s
-; CHECK-NEXT:    add z1.s, z2.s, z2.s
-; CHECK-NEXT:    add z2.s, z16.s, z16.s
-; CHECK-NEXT:    stp q6, q3, [x1, #64]
-; CHECK-NEXT:    add z3.s, z4.s, z4.s
-; CHECK-NEXT:    stp q0, q1, [x1, #32]
-; CHECK-NEXT:    stp q2, q3, [x1]
+; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    add z5.s, z18.s, z18.s
+; CHECK-NEXT:    add z1.s, z19.s, z19.s
+; CHECK-NEXT:    stp q2, q3, [x1, #96]
+; CHECK-NEXT:    add z2.s, z6.s, z6.s
+; CHECK-NEXT:    add z3.s, z7.s, z7.s
+; CHECK-NEXT:    add z4.s, z4.s, z4.s
+; CHECK-NEXT:    add z0.s, z0.s, z0.s
+; CHECK-NEXT:    stp q5, q1, [x1, #64]
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q4, q0, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index 7b9b69e0d9b4d..1fe81b8697122 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
-; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
-; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
+; RUN: llc -enable-subreg-liveness -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
 
 
 target triple = "aarch64-unknown-linux-gnu"
@@ -10,10 +10,8 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i8:
@@ -37,8 +35,6 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.b, z0.b[7]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.b, w8
@@ -68,8 +64,6 @@ define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @shuffle_ext_byone_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.b, z0.b[15]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.b, w8
@@ -156,9 +150,7 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revw z0.d, p0/m, z0.d
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i16:
@@ -177,8 +169,6 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.h, z0.h[3]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.h, w8
@@ -206,8 +196,6 @@ define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @shuffle_ext_byone_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.h, w8
@@ -284,8 +272,6 @@ define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.s, w8
@@ -309,8 +295,6 @@ define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @shuffle_ext_byone_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.s, w8
@@ -379,8 +363,6 @@ define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) {
 define <2 x i64> @shuffle_ext_byone_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    insr z1.d, x8
@@ -440,11 +422,9 @@ define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) {
 define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.h, z0.h[3]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    insr z0.h, h2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f16:
@@ -468,11 +448,9 @@ define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
 define <8 x half> @shuffle_ext_byone_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.h, z0.h[7]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.h, h2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f16:
@@ -545,11 +523,9 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) {
 define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    insr z0.s, s2
-; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f32:
@@ -569,11 +545,9 @@ define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2)
 define <4 x float> @shuffle_ext_byone_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.s, z0.s[3]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.s, s2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f32:
@@ -638,11 +612,9 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) {
 define <2 x double> @shuffle_ext_byone_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.d, z0.d[1]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.d, d2
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
index 67197b3fe4e80..edc55ffaaa611 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll
@@ -514,10 +514,6 @@ define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16
 ;
 ; SME2-LABEL: vector_deinterleave_nxv16i8_nxv64i8:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-NEXT:    uzp { z0.b - z3.b }, { z0.b - z3.b }
 ; SME2-NEXT:    ret
   %retval = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.vector.deinterleave4.nxv64i8(<vscale x 64 x i8> %vec)
@@ -539,10 +535,6 @@ define {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 ;
 ; SME2-LABEL: vector_deinterleave_nxv8i16_nxv32i16:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-NEXT:    uzp { z0.h - z3.h }, { z0.h - z3.h }
 ; SME2-NEXT:    ret
   %retval = call {<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>} @llvm.vector.deinterleave4.nxv32i16(<vscale x 32 x i16> %vec)
@@ -564,10 +556,6 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 ;
 ; SME2-LABEL: vector_deinterleave_nxv4i32_nxv16i32:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-NEXT:    uzp { z0.s - z3.s }, { z0.s - z3.s }
 ; SME2-NEXT:    ret
   %retval = call {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.vector.deinterleave4.nxv16i32(<vscale x 16 x i32> %vec)
@@ -589,22 +577,17 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 ;
 ; SME2-ALL-LABEL: vector_deinterleave_nxv2i64_nxv8i64:
 ; SME2-ALL:       // %bb.0:
-; SME2-ALL-NEXT:    uzp { z4.d, z5.d }, z2.d, z3.d
+; SME2-ALL-NEXT:    uzp { z2.d, z3.d }, z2.d, z3.d
 ; SME2-ALL-NEXT:    uzp { z0.d, z1.d }, z0.d, z1.d
-; SME2-ALL-NEXT:    uzp { z2.d, z3.d }, z0.d, z4.d
-; SME2-ALL-NEXT:    uzp { z4.d, z5.d }, z1.d, z5.d
-; SME2-ALL-NEXT:    mov z0.d, z2.d
-; SME2-ALL-NEXT:    mov z1.d, z4.d
-; SME2-ALL-NEXT:    mov z2.d, z3.d
-; SME2-ALL-NEXT:    mov z3.d, z5.d
+; SME2-ALL-NEXT:    uzp { z4.d, z5.d }, z0.d, z2.d
+; SME2-ALL-NEXT:    uzp { z2.d, z3.d }, z1.d, z3.d
+; SME2-ALL-NEXT:    mov z0.d, z4.d
+; SME2-ALL-NEXT:    mov z1.d, z2.d
+; SME2-ALL-NEXT:    mov z2.d, z5.d
 ; SME2-ALL-NEXT:    ret
 ;
 ; SME2-256-LABEL: vector_deinterleave_nxv2i64_nxv8i64:
 ; SME2-256:       // %bb.0:
-; SME2-256-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-256-NEXT:    uzp { z0.d - z3.d }, { z0.d - z3.d }
 ; SME2-256-NEXT:    ret
   %retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave4.nxv8i64(<vscale x 8 x i64> %vec)
@@ -643,51 +626,39 @@ define {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 ; SME2-ALL-LABEL: vector_deinterleave_nxv2i64_nxv16i64:
 ; SME2-ALL:       // %bb.0:
 ; SME2-ALL-NEXT:    uzp { z6.d, z7.d }, z6.d, z7.d
-; SME2-ALL-NEXT:    uzp { z24.d, z25.d }, z4.d, z5.d
-; SME2-ALL-NEXT:    uzp { z26.d, z27.d }, z24.d, z6.d
+; SME2-ALL-NEXT:    uzp { z4.d, z5.d }, z4.d, z5.d
+; SME2-ALL-NEXT:    uzp { z24.d, z25.d }, z4.d, z6.d
 ; SME2-ALL-NEXT:    uzp { z2.d, z3.d }, z2.d, z3.d
 ; SME2-ALL-NEXT:    uzp { z0.d, z1.d }, z0.d, z1.d
 ; SME2-ALL-NEXT:    uzp { z28.d, z29.d }, z0.d, z2.d
-; SME2-ALL-NEXT:    uzp { z4.d, z5.d }, z28.d, z26.d
-; SME2-ALL-NEXT:    uzp { z30.d, z31.d }, z25.d, z7.d
+; SME2-ALL-NEXT:    uzp { z26.d, z27.d }, z28.d, z24.d
+; SME2-ALL-NEXT:    uzp { z6.d, z7.d }, z5.d, z7.d
 ; SME2-ALL-NEXT:    uzp { z0.d, z1.d }, z1.d, z3.d
-; SME2-ALL-NEXT:    uzp { z6.d, z7.d }, z0.d, z30.d
-; SME2-ALL-NEXT:    uzp { z24.d, z25.d }, z29.d, z27.d
-; SME2-ALL-NEXT:    uzp { z26.d, z27.d }, z1.d, z31.d
-; SME2-ALL-NEXT:    mov z0.d, z4.d
-; SME2-ALL-NEXT:    mov z1.d, z6.d
+; SME2-ALL-NEXT:    uzp { z4.d, z5.d }, z0.d, z6.d
+; SME2-ALL-NEXT:    uzp { z24.d, z25.d }, z29.d, z25.d
+; SME2-ALL-NEXT:    uzp { z6.d, z7.d }, z1.d, z7.d
+; SME2-ALL-NEXT:    mov z0.d, z26.d
+; SME2-ALL-NEXT:    mov z1.d, z4.d
 ; SME2-ALL-NEXT:    mov z2.d, z24.d
-; SME2-ALL-NEXT:    mov z3.d, z26.d
-; SME2-ALL-NEXT:    mov z4.d, z5.d
-; SME2-ALL-NEXT:    mov z5.d, z7.d
+; SME2-ALL-NEXT:    mov z3.d, z6.d
+; SME2-ALL-NEXT:    mov z4.d, z27.d
 ; SME2-ALL-NEXT:    mov z6.d, z25.d
-; SME2-ALL-NEXT:    mov z7.d, z27.d
 ; SME2-ALL-NEXT:    ret
 ;
 ; SME2-256-LABEL: vector_deinterleave_nxv2i64_nxv16i64:
 ; SME2-256:       // %bb.0:
-; SME2-256-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; SME2-256-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; SME2-256-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; SME2-256-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
-; SME2-256-NEXT:    uzp { z28.d - z31.d }, { z4.d - z7.d }
+; SME2-256-NEXT:    uzp { z4.d - z7.d }, { z4.d - z7.d }
 ; SME2-256-NEXT:    uzp { z0.d - z3.d }, { z0.d - z3.d }
-; SME2-256-NEXT:    uzp { z4.d, z5.d }, z0.d, z28.d
-; SME2-256-NEXT:    uzp { z6.d, z7.d }, z1.d, z29.d
-; SME2-256-NEXT:    uzp { z24.d, z25.d }, z2.d, z30.d
-; SME2-256-NEXT:    uzp { z26.d, z27.d }, z3.d, z31.d
-; SME2-256-NEXT:    mov z0.d, z4.d
-; SME2-256-NEXT:    mov z1.d, z6.d
+; SME2-256-NEXT:    uzp { z26.d, z27.d }, z0.d, z4.d
+; SME2-256-NEXT:    uzp { z4.d, z5.d }, z1.d, z5.d
+; SME2-256-NEXT:    uzp { z24.d, z25.d }, z2.d, z6.d
+; SME2-256-NEXT:    uzp { z6.d, z7.d }, z3.d, z7.d
+; SME2-256-NEXT:    mov z0.d, z26.d
+; SME2-256-NEXT:    mov z1.d, z4.d
 ; SME2-256-NEXT:    mov z2.d, z24.d
-; SME2-256-NEXT:    mov z3.d, z26.d
-; SME2-256-NEXT:    mov z4.d, z5.d
-; SME2-256-NEXT:    mov z5.d, z7.d
+; SME2-256-NEXT:    mov z3.d, z6.d
+; SME2-256-NEXT:    mov z4.d, z27.d
 ; SME2-256-NEXT:    mov z6.d, z25.d
-; SME2-256-NEXT:    mov z7.d, z27.d
 ; SME2-256-NEXT:    ret
   %retval = call {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.vector.deinterleave8.nxv16i64(<vscale x 16 x i64> %vec)
   ret {<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>} %retval
@@ -759,11 +730,10 @@ define {<vscale x 4 x i64>, <vscale x 4 x i64>} @vector_deinterleave_nxv4i64_nxv
 ; SME2-LABEL: vector_deinterleave_nxv4i64_nxv8i64:
 ; SME2:       // %bb.0:
 ; SME2-NEXT:    uzp { z4.d, z5.d }, z0.d, z1.d
-; SME2-NEXT:    uzp { z6.d, z7.d }, z2.d, z3.d
+; SME2-NEXT:    uzp { z2.d, z3.d }, z2.d, z3.d
 ; SME2-NEXT:    mov z0.d, z4.d
-; SME2-NEXT:    mov z1.d, z6.d
+; SME2-NEXT:    mov z1.d, z2.d
 ; SME2-NEXT:    mov z2.d, z5.d
-; SME2-NEXT:    mov z3.d, z7.d
 ; SME2-NEXT:    ret
   %retval = call {<vscale x 4 x i64>, <vscale x 4 x i64>} @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %vec)
   ret {<vscale x 4 x i64>, <vscale x 4 x i64>} %retval
@@ -794,15 +764,14 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_nxv8i64_nxv
 ; SME2-NEXT:    uzp { z24.d, z25.d }, z0.d, z1.d
 ; SME2-NEXT:    uzp { z26.d, z27.d }, z2.d, z3.d
 ; SME2-NEXT:    uzp { z28.d, z29.d }, z4.d, z5.d
-; SME2-NEXT:    uzp { z30.d, z31.d }, z6.d, z7.d
+; SME2-NEXT:    uzp { z6.d, z7.d }, z6.d, z7.d
 ; SME2-NEXT:    mov z0.d, z24.d
 ; SME2-NEXT:    mov z1.d, z26.d
 ; SME2-NEXT:    mov z2.d, z28.d
-; SME2-NEXT:    mov z3.d, z30.d
+; SME2-NEXT:    mov z3.d, z6.d
 ; SME2-NEXT:    mov z4.d, z25.d
 ; SME2-NEXT:    mov z5.d, z27.d
 ; SME2-NEXT:    mov z6.d, z29.d
-; SME2-NEXT:    mov z7.d, z31.d
 ; SME2-NEXT:    ret
   %retval = call {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv16i64(<vscale x 16 x i64> %vec)
   ret {<vscale x 8 x i64>, <vscale x 8 x i64>} %retval
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
index 49f185c4312a2..2138af6b43b89 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mattr=+sve | FileCheck %s -check-prefixes=CHECK,SVE
-; RUN: llc < %s -mattr=+sve,+sme2 | FileCheck %s -check-prefixes=CHECK,SVE
-; RUN: llc < %s -mattr=+sme2 -force-streaming | FileCheck %s -check-prefixes=CHECK,SME2,SME-ALL
-; RUN: llc < %s -mattr=+sme2 -force-streaming -aarch64-sve-vector-bits-min=256 | FileCheck %s -check-prefixes=CHECK,SME2,SME2-256
+; RUN: llc < %s -mattr=+sve -enable-subreg-liveness=true | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc < %s -mattr=+sve,+sme2 -enable-subreg-liveness=true | FileCheck %s -check-prefixes=CHECK,SVE
+; RUN: llc < %s -mattr=+sme2 -force-streaming -enable-subreg-liveness=true | FileCheck %s -check-prefixes=CHECK,SME2,SME-ALL
+; RUN: llc < %s -mattr=+sme2 -force-streaming -aarch64-sve-vector-bits-min=256 -enable-subreg-liveness=true | FileCheck %s -check-prefixes=CHECK,SME2,SME2-256
 
 target triple = "aarch64-unknown-linux-gnu"
 
@@ -228,10 +228,7 @@ define <vscale x 6 x half> @interleave3_nxv6f16(<vscale x 2 x half> %vec0, <vsca
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp, #2, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -253,11 +250,8 @@ define <vscale x 12 x half> @interleave3_nxv12f16(<vscale x 4 x half> %vec0, <vs
 ; CHECK-NEXT:    addvl sp, sp, #-5
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x28, 0x1e, 0x22 // sp + 16 + 40 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    addpl x8, sp, #4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp, #1, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp]
@@ -281,10 +275,7 @@ define <vscale x 24 x half> @interleave3_nxv24f16(<vscale x 8 x half> %vec0, <vs
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -303,11 +294,8 @@ define <vscale x 6 x float> @interleave3_nxv6f32(<vscale x 2 x float> %vec0, <vs
 ; CHECK-NEXT:    addvl sp, sp, #-5
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x28, 0x1e, 0x22 // sp + 16 + 40 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    addpl x8, sp, #4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp, #1, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp]
@@ -331,10 +319,7 @@ define <vscale x 12 x float> @interleave3_nxv12f32(<vscale x 4 x float> %vec0, <
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -353,10 +338,7 @@ define <vscale x 6 x double> @interleave3_nxv6f64(<vscale x 2 x double> %vec0, <
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -375,10 +357,7 @@ define <vscale x 6 x bfloat> @interleave3_nxv6bf16(<vscale x 2 x bfloat> %vec0,
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp, #2, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -400,11 +379,8 @@ define <vscale x 12 x bfloat> @interleave3_nxv12bf16(<vscale x 4 x bfloat> %vec0
 ; CHECK-NEXT:    addvl sp, sp, #-5
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x09, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x11, 0x28, 0x1e, 0x22 // sp + 16 + 40 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    addpl x8, sp, #4
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp, #1, mul vl]
 ; CHECK-NEXT:    ldr z1, [sp]
@@ -428,10 +404,7 @@ define <vscale x 24 x bfloat> @interleave3_nxv24bf16(<vscale x 8 x bfloat> %vec0
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -452,10 +425,7 @@ define <vscale x 48 x i8> @interleave3_nxv48i8(<vscale x 16 x i8> %vec0, <vscale
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -474,10 +444,7 @@ define <vscale x 24 x i16> @interleave3_nxv24i16(<vscale x 8 x i16> %vec0, <vsca
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -496,10 +463,7 @@ define <vscale x 12 x i32> @interleave3_nxv12i32(<vscale x 4 x i32> %vec0, <vsca
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -518,10 +482,7 @@ define <vscale x 6 x i64> @interleave3_nxv6i64(<vscale x 2 x i64> %vec0, <vscale
 ; CHECK-NEXT:    addvl sp, sp, #-3
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [sp]
 ; CHECK-NEXT:    ldr z0, [sp]
 ; CHECK-NEXT:    ldr z1, [sp, #1, mul vl]
@@ -548,10 +509,6 @@ define <vscale x 64 x i8> @interleave4_nxv16i8(<vscale x 16 x i8> %vec0, <vscale
 ;
 ; SME2-LABEL: interleave4_nxv16i8:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-NEXT:    zip { z0.b - z3.b }, { z0.b - z3.b }
 ; SME2-NEXT:    ret
   %retval = call <vscale x 64 x i8> @llvm.vector.interleave4.nxv16i8(<vscale x 16 x i8> %vec0, <vscale x 16 x i8> %vec1, <vscale x 16 x i8> %vec2, <vscale x 16 x i8> %vec3)
@@ -573,10 +530,6 @@ define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscal
 ;
 ; SME2-LABEL: interleave4_nxv8i16:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-NEXT:    zip { z0.h - z3.h }, { z0.h - z3.h }
 ; SME2-NEXT:    ret
   %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv32i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
@@ -598,10 +551,6 @@ define <vscale x 16 x i32> @interleave4_nxv4i32(<vscale x 4 x i32> %vec0, <vscal
 ;
 ; SME2-LABEL: interleave4_nxv4i32:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-NEXT:    zip { z0.s - z3.s }, { z0.s - z3.s }
 ; SME2-NEXT:    ret
   %retval = call <vscale x 16 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 4 x i32> %vec0, <vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, <vscale x 4 x i32> %vec3)
@@ -631,10 +580,6 @@ define <vscale x 8 x i64> @interleave4_nxv8i64(<vscale x 2 x i64> %vec0, <vscale
 ;
 ; SME2-256-LABEL: interleave4_nxv8i64:
 ; SME2-256:       // %bb.0:
-; SME2-256-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; SME2-256-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; SME2-256-NEXT:    zip { z0.d - z3.d }, { z0.d - z3.d }
 ; SME2-256-NEXT:    ret
   %retval = call <vscale x 8 x i64> @llvm.vector.interleave4.nxv8i64(<vscale x 2 x i64> %vec0, <vscale x 2 x i64> %vec1, <vscale x 2 x i64> %vec2, <vscale x 2 x i64> %vec3)
@@ -692,9 +637,9 @@ define <vscale x 16 x i64> @interleave8_nxv16i64(<vscale x 2 x i64> %vec0, <vsca
 ; SME2-256-NEXT:    zip { z6.d, z7.d }, z2.d, z6.d
 ; SME2-256-NEXT:    zip { z24.d, z25.d }, z1.d, z5.d
 ; SME2-256-NEXT:    zip { z0.d, z1.d }, z0.d, z4.d
-; SME2-256-NEXT:    mov z28.d, z0.d
 ; SME2-256-NEXT:    mov z29.d, z24.d
 ; SME2-256-NEXT:    mov z30.d, z6.d
+; SME2-256-NEXT:    mov z28.d, z0.d
 ; SME2-256-NEXT:    mov z31.d, z26.d
 ; SME2-256-NEXT:    mov z24.d, z1.d
 ; SME2-256-NEXT:    mov z26.d, z7.d
@@ -896,8 +841,6 @@ define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
 define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) {
 ; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    // kill: def $w1 killed $w1 def $x1
-; SVE-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; SVE-NEXT:    mov z0.d, x0
 ; SVE-NEXT:    mov z1.d, x1
 ; SVE-NEXT:    zip2 z2.d, z0.d, z1.d
@@ -907,8 +850,6 @@ define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %
 ;
 ; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
 ; SME2:       // %bb.0:
-; SME2-NEXT:    // kill: def $w1 killed $w1 def $x1
-; SME2-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; SME2-NEXT:    mov z0.d, x0
 ; SME2-NEXT:    mov z1.d, x1
 ; SME2-NEXT:    zip { z0.d, z1.d }, z0.d, z1.d
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
index 8e53a82401e0b..c018b38a4eb6c 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve2,+lut | FileCheck %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve,+sme2,+lut | FileCheck %s
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sme2,+lut --force-streaming | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve2,+lut -enable-subreg-liveness=true | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sve,+sme2,+lut -enable-subreg-liveness=true | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+sme2,+lut --force-streaming -enable-subreg-liveness=true | FileCheck %s
 
 define <vscale x 16 x i8> @test_luti2_lane_i8(<vscale x 16 x i8> %table, <vscale x 16 x i8> %indices){
 ; CHECK-LABEL: test_luti2_lane_i8:
@@ -79,7 +79,7 @@ define <vscale x 8 x i16> @test_luti4_lane_i16_x2(<vscale x 8 x i16> %table, <vs
 ; CHECK-LABEL: test_luti4_lane_i16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z0.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> %table, <vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
@@ -90,7 +90,7 @@ define <vscale x 8 x half> @test_luti4_lane_f16_x2(<vscale x 8 x half> %table, <
 ; CHECK-LABEL: test_luti4_lane_f16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z0.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> %table, <vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
@@ -101,7 +101,7 @@ define <vscale x 8 x bfloat> @test_luti4_lane_bf16_x2(<vscale x 8 x bfloat> %tab
 ; CHECK-LABEL: test_luti4_lane_bf16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z3.d, z0.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
index 5f09ab2458f53..6e89e66e0e3b4 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve,+sme < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
 
 ;
 ; WHILEGE
@@ -174,6 +174,8 @@ define <vscale x 16 x i1> @whilehs_b_ii() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #8 // =0x8
+; CHECK-NEXT:    // kill: def $x8 killed $w8
+; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    whilehs p0.b, x9, x8
 ; CHECK-NEXT:    ret
 entry:
@@ -352,6 +354,8 @@ define <vscale x 16 x i1> @whilehi_b_ii() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #8 // =0x8
+; CHECK-NEXT:    // kill: def $x8 killed $w8
+; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    whilehi p0.b, x9, x8
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll b/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll
index 51673282bd8ff..c2c7c44e94e13 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-dots-partial-reduction.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
 
 define <vscale x 4 x i32> @udot_vl128(<vscale x 4 x i32> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: udot_vl128:
@@ -73,11 +73,7 @@ entry:
 define <4 x i32> @fixed_udot_s_h(<4 x i32> %acc, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: fixed_udot_s_h:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udot z0.s, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %a.wide = zext <8 x i16> %a to <8 x i32>
@@ -90,11 +86,7 @@ entry:
 define <4 x i32> @fixed_sdot_s_h(<4 x i32> %acc, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: fixed_sdot_s_h:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
-; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdot z0.s, z1.h, z2.h
-; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %a.wide = sext <8 x i16> %a to <8 x i32>
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll
index 952e0ecfb5343..39f818ba2eb86 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-crypto.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sve-aes2 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve-aes2,+ssve-aes -force-streaming < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve,+sve-aes2 < %s | FileCheck %s
+; RUN: llc -enable-subreg-liveness -mtriple=aarch64-linux-gnu -mattr=+sve-aes2,+ssve-aes -force-streaming < %s | FileCheck %s
 
 ;
 ; AESE
@@ -9,8 +9,6 @@
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aese_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: aese_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    aese { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aese.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0)
@@ -20,10 +18,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aese_x2(<vscale x 16 x i8> %a
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aese_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: aese_x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    aese { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }  @llvm.aarch64.sve.aese.lane.x4(<vscale x 16 x i8> %a,
@@ -39,8 +33,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesd_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: aesd_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    aesd { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesd.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0)
@@ -50,10 +42,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesd_x2(<vscale x 16 x i8> %a
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aesd_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: aesd_x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    aesd { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }  @llvm.aarch64.sve.aesd.lane.x4(<vscale x 16 x i8> %a,
@@ -70,8 +58,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesemc_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: aesemc_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    aesemc { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesemc.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0)
@@ -81,10 +67,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesemc_x2(<vscale x 16 x i8>
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aesemc_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: aesemc_x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    aesemc { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }  @llvm.aarch64.sve.aesemc.lane.x4(<vscale x 16 x i8> %a,
@@ -100,8 +82,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesdimc_x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: aesdimc_x2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    aesdimc { z0.b, z1.b }, { z0.b, z1.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.aesdimc.lane.x2(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, i32 0)
@@ -111,10 +91,6 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @aesdimc_x2(<vscale x 16 x i8>
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @aesdimc_x4(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) {
 ; CHECK-LABEL: aesdimc_x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    aesdimc { z0.b - z3.b }, { z0.b - z3.b }, z2.q[0]
 ; CHECK-NEXT:    ret
   %out= call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }  @llvm.aarch64.sve.aesdimc.lane.x4(<vscale x 16 x i8> %a,
@@ -145,8 +121,6 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmull_i64(<vscale x 2 x i64>
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @pmlal_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) {
 ; CHECK-LABEL: pmlal_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    pmlal { z0.q, z1.q }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %out = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.pmlal.pair.x2(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c,
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
index 67e92d59a42a4..262fd054b3cad 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p1 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -force-streaming < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p1 -force-streaming < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+sme2p1 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p1 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
 
 ;
 ; ST2Q
@@ -10,8 +10,6 @@
 define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -24,8 +22,6 @@ define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -39,8 +35,6 @@ define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -54,8 +48,6 @@ define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -69,8 +61,6 @@ define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -84,8 +74,6 @@ define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -99,8 +87,6 @@ define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -114,8 +100,6 @@ define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -130,8 +114,6 @@ define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2q_si_i8_off16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
@@ -145,8 +127,6 @@ define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2q_si_i8_off14:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
@@ -160,8 +140,6 @@ define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
@@ -175,8 +153,6 @@ define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
@@ -190,8 +166,6 @@ define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
@@ -205,8 +179,6 @@ define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
@@ -220,8 +192,6 @@ define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
@@ -235,8 +205,6 @@ define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
@@ -250,8 +218,6 @@ define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
@@ -269,9 +235,6 @@ define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -286,9 +249,6 @@ define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -303,9 +263,6 @@ define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -320,9 +277,6 @@ define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -337,9 +291,6 @@ define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -354,9 +305,6 @@ define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -371,9 +319,6 @@ define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -388,9 +333,6 @@ define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -405,9 +347,6 @@ define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i8_off24:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
@@ -422,9 +361,6 @@ define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i8_off21:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
@@ -439,9 +375,6 @@ define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
@@ -456,9 +389,6 @@ define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
@@ -473,9 +403,6 @@ define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
@@ -490,9 +417,6 @@ define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale
 define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
@@ -507,9 +431,6 @@ define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
@@ -524,9 +445,6 @@ define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
@@ -541,9 +459,6 @@ define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
@@ -561,10 +476,6 @@ define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -580,10 +491,6 @@ define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -599,10 +506,6 @@ define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -618,10 +521,6 @@ define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -637,10 +536,6 @@ define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -656,10 +551,6 @@ define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -675,10 +566,6 @@ define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -694,10 +581,6 @@ define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -713,10 +596,6 @@ define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i8_off32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
@@ -732,10 +611,6 @@ define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i8_off28:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
@@ -751,10 +626,6 @@ define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3,  <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
@@ -770,10 +641,6 @@ define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
@@ -789,10 +656,6 @@ define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
@@ -808,10 +671,6 @@ define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
@@ -827,10 +686,6 @@ define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3,  <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
@@ -846,10 +701,6 @@ define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
@@ -865,10 +716,6 @@ define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
-; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
index be03cfee1eec3..f4c6b1325b060 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
@@ -8,17 +8,17 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z4.b - z7.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -32,17 +32,17 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -56,17 +56,17 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -80,17 +80,17 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -104,17 +104,17 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -128,17 +128,17 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -152,17 +152,17 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -176,17 +176,17 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
+; CHECK-NEXT:    mov z26.d, z7.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z30.d, z3.d
-; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z7.d, z4.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ldr z27, [x0]
-; CHECK-NEXT:    mov z28.d, z1.d
-; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    mov z5.d, z2.d
+; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
index 48366306141f6..f0e9efbe86ab2 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p1 -force-streaming < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -enable-misched=0 -mattr=+sve2p1 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -enable-misched=0 -mattr=+sme2 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu -enable-misched=0 -mattr=+sme,+sve2p1 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
 
 ; == Normal Multi-Vector Consecutive Stores ==
 
@@ -11,9 +11,9 @@ define void @st1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1b { z2.b, z3.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -29,9 +29,9 @@ define void @st1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -47,9 +47,9 @@ define void @st1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -65,9 +65,9 @@ define void @st1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -83,9 +83,9 @@ define void @st1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -101,9 +101,9 @@ define void @st1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -119,9 +119,9 @@ define void @st1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -137,9 +137,9 @@ define void @st1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -155,11 +155,11 @@ define void @st1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1b { z4.b - z7.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -175,11 +175,11 @@ define void @st1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -195,11 +195,11 @@ define void @st1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -215,11 +215,11 @@ define void @st1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -235,11 +235,11 @@ define void @st1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -255,11 +255,11 @@ define void @st1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -275,11 +275,11 @@ define void @st1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -295,11 +295,11 @@ define void @st1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -317,9 +317,9 @@ define void @stnt1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1b { z2.b, z3.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -335,9 +335,9 @@ define void @stnt1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -353,9 +353,9 @@ define void @stnt1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -371,9 +371,9 @@ define void @stnt1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -389,9 +389,9 @@ define void @stnt1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -407,9 +407,9 @@ define void @stnt1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -425,9 +425,9 @@ define void @stnt1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -443,9 +443,9 @@ define void @stnt1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -461,11 +461,11 @@ define void @stnt1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1b { z4.b - z7.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -481,11 +481,11 @@ define void @stnt1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -501,11 +501,11 @@ define void @stnt1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -521,11 +521,11 @@ define void @stnt1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -541,11 +541,11 @@ define void @stnt1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -561,11 +561,11 @@ define void @stnt1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -581,11 +581,11 @@ define void @stnt1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -601,11 +601,11 @@ define void @stnt1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    mov z6.d, z3.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Spill
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
index 198d3c9215846..53436794b5b3c 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
@@ -97,11 +97,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @uzp_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
 ; CHECK-LABEL: uzp_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z27.d, z5.d
-; CHECK-NEXT:    mov z26.d, z4.d
-; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    uzp { z0.d - z3.d }, { z24.d - z27.d }
+; CHECK-NEXT:    mov z7.d, z5.d
+; CHECK-NEXT:    mov z6.d, z4.d
+; CHECK-NEXT:    mov z5.d, z3.d
+; CHECK-NEXT:    mov z4.d, z2.d
+; CHECK-NEXT:    uzp { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzp.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
@@ -204,11 +204,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @zipq_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
 ; CHECK-LABEL: zipq_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z27.d, z5.d
-; CHECK-NEXT:    mov z26.d, z4.d
-; CHECK-NEXT:    mov z25.d, z3.d
-; CHECK-NEXT:    mov z24.d, z2.d
-; CHECK-NEXT:    uzp { z0.q - z3.q }, { z24.q - z27.q }
+; CHECK-NEXT:    mov z7.d, z5.d
+; CHECK-NEXT:    mov z6.d, z4.d
+; CHECK-NEXT:    mov z5.d, z3.d
+; CHECK-NEXT:    mov z4.d, z2.d
+; CHECK-NEXT:    uzp { z0.q - z3.q }, { z4.q - z7.q }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzpq.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
index db7a3fa46946e..f91806a15689f 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64 -mattr=+sve,+sme2 < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64 -mattr=+sme2p1 -force-streaming < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64 -mattr=+sme,+sve2p1 -force-streaming < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve,+sme2 -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sme2p1 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -mattr=+sme,+sve2p1 -force-streaming -enable-subreg-liveness=true < %s | FileCheck %s
 
 ; == WHILEGE ==
 
@@ -10,7 +10,6 @@ define <vscale x 16 x i1> @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -21,7 +20,6 @@ define <vscale x 8 x i1> @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -32,7 +30,6 @@ define <vscale x 4 x i1> @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -43,7 +40,6 @@ define <vscale x 2 x i1> @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -57,7 +53,6 @@ define <vscale x 16 x i1> @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -68,7 +63,6 @@ define <vscale x 8 x i1> @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -79,7 +73,6 @@ define <vscale x 4 x i1> @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -90,7 +83,6 @@ define <vscale x 2 x i1> @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -104,7 +96,6 @@ define <vscale x 16 x i1> @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -115,7 +106,6 @@ define <vscale x 8 x i1> @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -126,7 +116,6 @@ define <vscale x 4 x i1> @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -137,7 +126,6 @@ define <vscale x 2 x i1> @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -151,7 +139,6 @@ define <vscale x 16 x i1> @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -162,7 +149,6 @@ define <vscale x 8 x i1> @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -173,7 +159,6 @@ define <vscale x 4 x i1> @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -184,7 +169,6 @@ define <vscale x 2 x i1> @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -198,7 +182,6 @@ define <vscale x 16 x i1> @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -209,7 +192,6 @@ define <vscale x 8 x i1> @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -220,7 +202,6 @@ define <vscale x 4 x i1> @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -231,7 +212,6 @@ define <vscale x 2 x i1> @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -245,7 +225,6 @@ define <vscale x 16 x i1> @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -256,7 +235,6 @@ define <vscale x 8 x i1> @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -267,7 +245,6 @@ define <vscale x 4 x i1> @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -278,7 +255,6 @@ define <vscale x 2 x i1> @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -292,7 +268,6 @@ define <vscale x 16 x i1> @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -303,7 +278,6 @@ define <vscale x 8 x i1> @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -314,7 +288,6 @@ define <vscale x 4 x i1> @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -325,7 +298,6 @@ define <vscale x 2 x i1> @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -339,7 +311,6 @@ define <vscale x 16 x i1> @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.b, p1.b }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -350,7 +321,6 @@ define <vscale x 8 x i1> @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.h, p1.h }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -361,7 +331,6 @@ define <vscale x 4 x i1> @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.s, p1.s }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -372,7 +341,6 @@ define <vscale x 2 x i1> @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.d, p1.d }, x0, x1
-; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0

>From 83d81f1e566e33f231284c2da1bd988f8f0aa0fc Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Wed, 14 Jan 2026 16:42:15 +0000
Subject: [PATCH 3/3] [AArch64] Add new pass after VirtRegRewriter to add
 implicit-defs (#174188)

When SubRegister Liveness Tracking (SRLT) is enabled, this pass adds
extra implicit-def's to instructions that define the low N bits of a
GPR/FPR register to represent that the top bits are written, because all
AArch64 instructions that write the low bits of a GPR/FPR also
implicitly zero the top bits.

These semantics are originally represented in the MIR using
`SUBREG_TO_REG`, but during register coalescing this information is lost
and when rewriting virtual -> physical registers the implicit-defs are
not added to represent the the top bits are written.

There have been several attempts to fix this in the coalescer (#168353),
but each iteration has exposed new bugs and the patch had to be
reverted. Additionally, the concept of adding 'implicit-def' of a
virtual register during the register allocation process is particularly
fragile and many places don't expect it (for example in
`X86::commuteInstructionImpl` the code only looks at specific operands
and does not consider implicit-defs. Similar in
`SplitEditor::addDeadDef` where it traverses operand 'defs' rather than
'all_defs').

We want a temporary solution that doesn't impact other targets and is
simpler and less intrusive than the patch proposed for the register
coalescer so that we can enable SRLT to make better use of SVE/SME
multi-vector instructions while we work on a more permanent solution
that requires rewriting a large part of the AArch64 instructions (32-bit
and NEON).

(cherry picked from commit 91f5d73b311f3622517ff1d34d21cc8ef1f52ea9)
---
 llvm/lib/Target/AArch64/AArch64.h             |   2 +
 .../Target/AArch64/AArch64RegisterInfo.cpp    |   5 +-
 .../AArch64/AArch64SRLTDefineSuperRegs.cpp    | 248 ++++++++++++++
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |   7 +-
 llvm/lib/Target/AArch64/AArch64Subtarget.h    |   8 +-
 .../Target/AArch64/AArch64TargetMachine.cpp   |  16 +-
 llvm/lib/Target/AArch64/CMakeLists.txt        |   1 +
 llvm/test/CodeGen/AArch64/O3-pipeline.ll      |   1 +
 llvm/test/CodeGen/AArch64/active_lane_mask.ll |   2 -
 llvm/test/CodeGen/AArch64/arm64-addrmode.ll   | 130 +++----
 .../AArch64/get-active-lane-mask-extract.ll   |   9 -
 .../AArch64/intrinsic-cttz-elts-sve.ll        |   4 +-
 .../AArch64/preserve_nonecc_varargs_darwin.ll |  15 +-
 ...gister-coalesce-update-subranges-remat.mir |   1 -
 .../CodeGen/AArch64/sme-intrinsics-loads.ll   |   3 +-
 .../CodeGen/AArch64/sme-intrinsics-stores.ll  |   3 +-
 .../streaming-compatible-memory-ops.ll        |   1 -
 ...iveness-fix-subreg-to-reg-implicit-def.mir | 107 ++++++
 .../subreg_to_reg_coalescing_issue.mir        |   3 +-
 .../CodeGen/AArch64/sve-bf16-reductions.ll    |   2 -
 .../sve-intrinsics-contiguous-prefetches.ll   |   1 -
 .../sve-intrinsics-stN-reg-imm-addr-mode.ll   |   1 -
 .../CodeGen/AArch64/sve-intrinsics-while.ll   |   8 -
 ...e-streaming-mode-fixed-length-ext-loads.ll |   9 +-
 ...e-streaming-mode-fixed-length-fcopysign.ll |  49 +--
 ...-streaming-mode-fixed-length-fp-vselect.ll |   6 +-
 ...sve-streaming-mode-fixed-length-int-div.ll |   9 -
 ...streaming-mode-fixed-length-int-extends.ll | 261 ++++++--------
 ...ve-streaming-mode-fixed-length-int-mulh.ll |  56 +--
 ...-streaming-mode-fixed-length-int-shifts.ll | 324 +-----------------
 ...streaming-mode-fixed-length-int-vselect.ll |   5 +-
 .../sve-streaming-mode-fixed-length-loads.ll  |   1 -
 ...mode-fixed-length-masked-gather-scatter.ll |   3 -
 ...eaming-mode-fixed-length-optimize-ptrue.ll |   2 -
 ...eaming-mode-fixed-length-vector-shuffle.ll |   5 +-
 .../CodeGen/AArch64/sve2-intrinsics-while.ll  |   4 -
 36 files changed, 581 insertions(+), 731 deletions(-)
 create mode 100644 llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp
 create mode 100644 llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir

diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index a8e15c338352a..40983714ddf1d 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -64,6 +64,7 @@ FunctionPass *createAArch64CollectLOHPass();
 FunctionPass *createSMEABIPass();
 FunctionPass *createSMEPeepholeOptPass();
 FunctionPass *createMachineSMEABIPass(CodeGenOptLevel);
+FunctionPass *createAArch64SRLTDefineSuperRegsPass();
 ModulePass *createSVEIntrinsicOptsPass();
 InstructionSelector *
 createAArch64InstructionSelector(const AArch64TargetMachine &,
@@ -117,6 +118,7 @@ void initializeLDTLSCleanupPass(PassRegistry&);
 void initializeSMEABIPass(PassRegistry &);
 void initializeSMEPeepholeOptPass(PassRegistry &);
 void initializeMachineSMEABIPass(PassRegistry &);
+void initializeAArch64SRLTDefineSuperRegsPass(PassRegistry &);
 void initializeSVEIntrinsicOptsPass(PassRegistry &);
 void initializeAArch64Arm64ECCallLoweringPass(PassRegistry &);
 } // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 098fc4528c91e..8c0dd4381fae8 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1385,9 +1385,8 @@ bool AArch64RegisterInfo::shouldCoalesce(
   MachineFunction &MF = *MI->getMF();
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  // Coalescing of SUBREG_TO_REG is broken when using subreg liveness tracking,
-  // we must disable it for now.
-  if (MI->isSubregToReg() && MRI.subRegLivenessEnabled())
+  if (MI->isSubregToReg() && MRI.subRegLivenessEnabled() &&
+      !MF.getSubtarget<AArch64Subtarget>().enableSRLTSubregToRegMitigation())
     return false;
 
   if (MI->isCopy() &&
diff --git a/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp b/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp
new file mode 100644
index 0000000000000..40345769a64d9
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp
@@ -0,0 +1,248 @@
+//===- AArch64SRLTDefineSuperRegs.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// When SubRegister Liveness Tracking (SRLT) is enabled, this pass adds
+// extra implicit-def's to instructions that define the low N bits of
+// a GPR/FPR register to also define the top bits, because all AArch64
+// instructions that write the low bits of a GPR/FPR also implicitly zero
+// the top bits.  For example, 'mov w0, w1' writes zeroes to the top 32-bits of
+// x0, so this pass adds a `implicit-def $x0` after register allocation.
+//
+// These semantics are originally represented in the MIR using `SUBREG_TO_REG`
+// which expresses that the top bits have been defined by the preceding
+// instructions, but during register coalescing this information is lost and in
+// contrast to when SRTL is disabled, when rewriting virtual -> physical
+// registers the implicit-defs are not added to the instruction.
+//
+// There have been several attempts to fix this in the coalescer [1], but each
+// iteration has exposed new bugs and the patch had to be reverted.
+// Additionally, the concept of adding 'implicit-def' of a virtual register is
+// particularly fragile and many places don't expect it (for example in
+// `X86::commuteInstructionImpl` the  code only looks at specific operands and
+// does not consider implicit-defs. Similar in `SplitEditor::addDeadDef` where
+// it traverses operand 'defs' rather than 'all_defs').
+//
+// We want a temporary solution that doesn't impact other targets and is simpler
+// and less intrusive than the patch proposed for the register coalescer [1], so
+// that we can enable SRLT for AArch64.
+//
+// The approach here is to just add the 'implicit-def' manually after rewriting
+// virtual regs -> phsyical regs. This still means that during the register
+// allocation process the dependences are not accurately represented in the MIR
+// and LiveIntervals, but there are several reasons why we believe this isn't a
+// problem in practice:
+// (A) The register allocator only spills entire virtual registers.
+//     This is additionally guarded by code in
+//     AArch64InstrInfo::storeRegToStackSlot/loadRegFromStackSlot
+//     where it checks if a register matches the expected register class.
+// (B) Rematerialization only happens when the instruction writes the full
+//     register.
+// (C) The high bits of the AArch64 register cannot be written independently.
+// (D) Instructions that write only part of a register always take that same
+//     register as a tied input operand, to indicate it's a merging operation.
+//
+// (A) means that for two virtual registers of regclass GPR32 and GPR64, if the
+// GPR32 register is coalesced into the GPR64 vreg then the full GPR64 would
+// be spilled/filled even if only the low 32-bits would be required for the
+// given liverange. (B) means that the top bits of a GPR64 would never be
+// overwritten by rematerialising a GPR32 sub-register for a given liverange.
+// (C-D) means that we can assume that the MIR as input to the register
+// allocator correctly expresses the instruction behaviour and dependences
+// between values, so unless the register allocator would violate (A) or (B),
+// the MIR is otherwise sound.
+//
+// Alternative approaches have also been considered, such as:
+// (1) Changing the AArch64 instruction definitions to write all bits and
+//     extract the low N bits for the result.
+// (2) Disabling coalescing of SUBREG_TO_REG and using regalloc hints to tell
+//     the register allocator to favour the same register for the input/output.
+// (3) Adding a new coalescer guard node with a tied-operand constraint, such
+//     that when the SUBREG_TO_REG is removed, something still represents that
+//     the top bits are defined. The node would get removed before rewriting
+//     virtregs.
+// (4) Using an explicit INSERT_SUBREG into a zero value and try to optimize
+//     away the INSERT_SUBREG (this is a more explicit variant of (2) and (3))
+// (5) Adding a new MachineOperand flag that represents the top bits would be
+//     defined, but are not read nor undef.
+//
+// (1) would be the best approach but would be a significant effort as it
+// requires rewriting most/all instruction definitions and fixing MIR passes
+// that rely on the current definitions, whereas (2-4) result in sub-optimal
+// code that can't really be avoided because the explicit nodes would stop
+// rematerialization. (5) might be a way to mitigate the
+// fragility of implicit-def's of virtual registers if we want to pursue
+// landing [1], but then we'd rather choose approach (1) to avoid using
+// SUBREG_TO_REG entirely.
+//
+// [1] https://github.com/llvm/llvm-project/pull/168353
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-srlt-define-superregs"
+#define PASS_NAME "AArch64 SRLT Define Super-Regs Pass"
+
+namespace {
+
+struct AArch64SRLTDefineSuperRegs : public MachineFunctionPass {
+  inline static char ID = 0;
+
+  AArch64SRLTDefineSuperRegs() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  Register getWidestSuperReg(Register R, const BitVector &RequiredBaseRegUnits,
+                             const BitVector &QHiRegUnits);
+
+  StringRef getPassName() const override { return PASS_NAME; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreservedID(MachineLoopInfoID);
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  MachineFunction *MF = nullptr;
+  const AArch64Subtarget *Subtarget = nullptr;
+  const AArch64RegisterInfo *TRI = nullptr;
+};
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(AArch64SRLTDefineSuperRegs, DEBUG_TYPE, PASS_NAME, false, false)
+
+// Returns the widest super-reg for a given reg, or NoRegister if no suitable
+// wider super-reg has been found. For example:
+//  W0    -> X0
+//  B1    -> Q1 (without SVE)
+//        -> Z1 (with SVE)
+//  W1_W2 -> X1_X2
+//  D0_D1 -> Q0_Q1 (without SVE)
+//        -> Z0_Z1 (with SVE)
+Register AArch64SRLTDefineSuperRegs::getWidestSuperReg(
+    Register R, const BitVector &RequiredBaseRegUnits,
+    const BitVector &QHiRegUnits) {
+  assert(R.isPhysical() &&
+         "Expected to be run straight after virtregrewriter!");
+
+  BitVector Units(TRI->getNumRegUnits());
+  for (MCRegUnit U : TRI->regunits(R))
+    Units.set((unsigned)U);
+
+  auto IsSuitableSuperReg = [&](Register SR) {
+    for (MCRegUnit U : TRI->regunits(SR)) {
+      // Avoid choosing z1 as super-reg of d1 if SVE is not available.
+      // Q*_HI registers are only set for SVE registers, as those consist
+      // of the Q* register for the low 128 bits and the Q*_HI (artificial)
+      // register for the top (vscale-1) * 128 bits.
+      if (QHiRegUnits.test((unsigned)U) &&
+          !Subtarget->isSVEorStreamingSVEAvailable())
+        return false;
+      // We consider a super-reg as unsuitable if any of its reg units is not
+      // artificial and not shared, as that would imply that U is a unit for a
+      // different register, which means the candidate super-reg is likely
+      // a register tuple.
+      if (!TRI->isArtificialRegUnit(U) &&
+          (!Units.test((unsigned)U) || !RequiredBaseRegUnits.test((unsigned)U)))
+        return false;
+    }
+    return true;
+  };
+
+  Register LargestSuperReg = AArch64::NoRegister;
+  for (Register SR : TRI->superregs(R))
+    if (IsSuitableSuperReg(SR) && (LargestSuperReg == AArch64::NoRegister ||
+                                   TRI->isSuperRegister(LargestSuperReg, SR)))
+      LargestSuperReg = SR;
+
+  return LargestSuperReg;
+}
+
+bool AArch64SRLTDefineSuperRegs::runOnMachineFunction(MachineFunction &MF) {
+  this->MF = &MF;
+  Subtarget = &MF.getSubtarget<AArch64Subtarget>();
+  TRI = Subtarget->getRegisterInfo();
+  const MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+  if (!MRI->subRegLivenessEnabled())
+    return false;
+
+  assert(!MRI->isSSA() && "Expected to be run after breaking down SSA form!");
+
+  auto XRegs = seq_inclusive<unsigned>(AArch64::X0, AArch64::X28);
+  auto ZRegs = seq_inclusive<unsigned>(AArch64::Z0, AArch64::Z31);
+  constexpr unsigned FixedRegs[] = {AArch64::FP, AArch64::LR, AArch64::SP};
+
+  BitVector RequiredBaseRegUnits(TRI->getNumRegUnits());
+  for (Register R : concat<unsigned>(XRegs, ZRegs, FixedRegs))
+    for (MCRegUnit U : TRI->regunits(R))
+      RequiredBaseRegUnits.set((unsigned)U);
+
+  BitVector QHiRegUnits(TRI->getNumRegUnits());
+  for (Register R : seq_inclusive<unsigned>(AArch64::Q0_HI, AArch64::Q31_HI))
+    for (MCRegUnit U : TRI->regunits(R))
+      QHiRegUnits.set((unsigned)U);
+
+  bool Changed = false;
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      // PATCHPOINT may have a 'def' that's not a register, avoid this.
+      if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
+        continue;
+      // For each partial register write, also add an implicit-def for top bits
+      // of the register (e.g. for w0 add a def of x0).
+      SmallSet<Register, 8> SuperRegs;
+      for (const MachineOperand &DefOp : MI.defs())
+        if (Register R = getWidestSuperReg(DefOp.getReg(), RequiredBaseRegUnits,
+                                           QHiRegUnits);
+            R != AArch64::NoRegister)
+          SuperRegs.insert(R);
+
+      if (!SuperRegs.size())
+        continue;
+
+      LLVM_DEBUG(dbgs() << "Adding implicit-defs to: " << MI);
+      for (Register R : SuperRegs) {
+        LLVM_DEBUG(dbgs() << "  " << printReg(R, TRI) << "\n");
+        bool IsRenamable = any_of(MI.defs(), [&](const MachineOperand &MO) {
+          return MO.isRenamable() && TRI->regsOverlap(MO.getReg(), R);
+        });
+        bool IsDead = any_of(MI.defs(), [&](const MachineOperand &MO) {
+          return MO.isDead() && TRI->regsOverlap(MO.getReg(), R);
+        });
+        MachineOperand DefOp = MachineOperand::CreateReg(
+            R, /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+            /*isDead=*/IsDead, /*isUndef=*/false, /*isEarlyClobber=*/false,
+            /*SubReg=*/0, /*isDebug=*/false, /*isInternalRead=*/false,
+            /*isRenamable=*/IsRenamable);
+        MI.addOperand(DefOp);
+      }
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createAArch64SRLTDefineSuperRegsPass() {
+  return new AArch64SRLTDefineSuperRegs();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 4d326dc97ca51..a642841243be3 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -355,7 +355,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
                                    unsigned MinSVEVectorSizeInBitsOverride,
                                    unsigned MaxSVEVectorSizeInBitsOverride,
                                    bool IsStreaming, bool IsStreamingCompatible,
-                                   bool HasMinSize)
+                                   bool HasMinSize,
+                                   bool EnableSRLTSubregToRegMitigation)
     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
@@ -367,7 +368,9 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
               ? std::optional<unsigned>(AArch64StreamingHazardSize)
               : std::nullopt),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
-      MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
+      MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride),
+      EnableSRLTSubregToRegMitigation(EnableSRLTSubregToRegMitigation),
+      TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
       TLInfo(TM, *this) {
   if (AArch64::isX18ReservedByDefault(TT))
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index bd8a2d5234f2d..248e140b3101c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -88,6 +88,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   std::optional<unsigned> StreamingHazardSize;
   unsigned MinSVEVectorSizeInBits;
   unsigned MaxSVEVectorSizeInBits;
+  bool EnableSRLTSubregToRegMitigation;
   unsigned VScaleForTuning = 1;
   TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
 
@@ -128,7 +129,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
                    unsigned MinSVEVectorSizeInBitsOverride = 0,
                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
                    bool IsStreaming = false, bool IsStreamingCompatible = false,
-                   bool HasMinSize = false);
+                   bool HasMinSize = false,
+                   bool EnableSRLTSubregToRegMitigation = false);
 
 // Getters for SubtargetFeatures defined in tablegen
 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
@@ -467,6 +469,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// add + cnt instructions.
   bool useScalarIncVL() const;
 
+  bool enableSRLTSubregToRegMitigation() const {
+    return EnableSRLTSubregToRegMitigation;
+  }
+
   /// Choose a method of checking LR before performing a tail call.
   AArch64PAuth::AuthCheckMethod
   getAuthenticatedLRCheckMethod(const MachineFunction &MF) const;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 1ec5a20cc0ce0..3aba866458830 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -227,6 +227,12 @@ static cl::opt<bool>
                             cl::desc("Enable new lowering for the SME ABI"),
                             cl::init(true), cl::Hidden);
 
+static cl::opt<bool> EnableSRLTSubregToRegMitigation(
+    "aarch64-srlt-mitigate-sr2r",
+    cl::desc("Enable SUBREG_TO_REG mitigation by adding 'implicit-def' for "
+             "super-regs when using Subreg Liveness Tracking"),
+    cl::init(true), cl::Hidden);
+
 extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
 LLVMInitializeAArch64Target() {
   // Register the target.
@@ -268,6 +274,7 @@ LLVMInitializeAArch64Target() {
   initializeKCFIPass(PR);
   initializeSMEABIPass(PR);
   initializeMachineSMEABIPass(PR);
+  initializeAArch64SRLTDefineSuperRegsPass(PR);
   initializeSMEPeepholeOptPass(PR);
   initializeSVEIntrinsicOptsPass(PR);
   initializeAArch64SpeculationHardeningPass(PR);
@@ -462,7 +469,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
     resetTargetOptions(F);
     I = std::make_unique<AArch64Subtarget>(
         TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
-        MaxSVEVectorSize, IsStreaming, IsStreamingCompatible, HasMinSize);
+        MaxSVEVectorSize, IsStreaming, IsStreamingCompatible, HasMinSize,
+        EnableSRLTSubregToRegMitigation);
   }
 
   if (IsStreaming && !I->hasSME())
@@ -550,6 +558,7 @@ class AArch64PassConfig : public TargetPassConfig {
   void addMachineSSAOptimization() override;
   bool addILPOpts() override;
   void addPreRegAlloc() override;
+  void addPostRewrite() override;
   void addPostRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
@@ -815,6 +824,11 @@ void AArch64PassConfig::addPreRegAlloc() {
     addPass(&MachinePipelinerID);
 }
 
+void AArch64PassConfig::addPostRewrite() {
+  if (EnableSRLTSubregToRegMitigation)
+    addPass(createAArch64SRLTDefineSuperRegsPass());
+}
+
 void AArch64PassConfig::addPostRegAlloc() {
   // Remove redundant copy instructions.
   if (TM->getOptLevel() != CodeGenOptLevel::None &&
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 3334b3689e03f..2fe554217c1ba 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_target(AArch64CodeGen
   SMEPeepholeOpt.cpp
   SVEIntrinsicOpts.cpp
   MachineSMEABIPass.cpp
+  AArch64SRLTDefineSuperRegs.cpp
   AArch64SIMDInstrOpt.cpp
   AArch64PrologueEpilogue.cpp
 
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
index e4249fe4fb1c8..d137b8c9ac1e0 100644
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -191,6 +191,7 @@
 ; CHECK-NEXT:       Virtual Register Rewriter
 ; CHECK-NEXT:       Register Allocation Pass Scoring
 ; CHECK-NEXT:       Stack Slot Coloring
+; CHECK-NEXT:       AArch64 SRLT Define Super-Regs Pass
 ; CHECK-NEXT:       Machine Copy Propagation Pass
 ; CHECK-NEXT:       Machine Loop Invariant Code Motion
 ; CHECK-NEXT:       AArch64 Redundant Copy Elimination
diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 44b90dfb73703..05d083a654cf6 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -410,7 +410,6 @@ define <vscale x 4 x i1> @lane_mask_nxv4i1_imm5() {
 ; CHECK-LABEL: lane_mask_nxv4i1_imm5:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #5 // =0x5
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
 entry:
@@ -432,7 +431,6 @@ define <vscale x 16 x i1> @lane_mask_nxv16i1_imm10() {
 ; CHECK-LABEL: lane_mask_nxv16i1_imm10:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #10 // =0xa
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index c8b7035b7c6e3..f8695b62619c0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -43,7 +43,6 @@ define void @t4(ptr %object) {
 ; CHECK-LABEL: t4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    ldr xzr, [x0, x8]
 ; CHECK-NEXT:    ret
   %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096
@@ -70,7 +69,6 @@ define void @t6(i64 %a, ptr %object) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    add x8, x1, x0, lsl #3
 ; CHECK-NEXT:    mov w9, #32768 // =0x8000
-; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    ldr xzr, [x8, x9]
 ; CHECK-NEXT:    ret
   %tmp1 = getelementptr inbounds i64, ptr %object, i64 %a
@@ -84,7 +82,6 @@ define void @t7(i64 %a) {
 ; CHECK-LABEL: t7:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #65535 // =0xffff
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    ldr xzr, [x0, x8]
 ; CHECK-NEXT:    ret
   %1 = add i64 %a, 65535   ;0xffff
@@ -134,7 +131,6 @@ define void @t11(i64 %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #17767 // =0x4567
 ; CHECK-NEXT:    movk w8, #291, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    ldr xzr, [x0, x8]
 ; CHECK-NEXT:    ret
   %1 = add i64 %a, 19088743   ;0x1234567
@@ -218,10 +214,8 @@ define void @t17(i64 %a) {
 define i8 @LdOffset_i8(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -232,10 +226,8 @@ define i8 @LdOffset_i8(ptr %a)  {
 define i32 @LdOffset_i8_zext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_zext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -247,10 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a)  {
 define i32 @LdOffset_i8_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsb w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrsb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -262,11 +252,8 @@ define i32 @LdOffset_i8_sext32(ptr %a)  {
 define i64 @LdOffset_i8_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrb w8, [x0, x8]
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrb w0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -278,10 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a)  {
 define i64 @LdOffset_i8_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsb x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrsb x0, [x8, #3704]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -293,10 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a)  {
 define i16 @LdOffset_i16(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -307,10 +290,8 @@ define i16 @LdOffset_i16(ptr %a)  {
 define i32 @LdOffset_i16_zext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_zext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -322,10 +303,8 @@ define i32 @LdOffset_i16_zext32(ptr %a)  {
 define i32 @LdOffset_i16_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_sext32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsh w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrsh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -337,11 +316,8 @@ define i32 @LdOffset_i16_sext32(ptr %a)  {
 define i64 @LdOffset_i16_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrh w8, [x0, x8]
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrh w0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -353,10 +329,8 @@ define i64 @LdOffset_i16_zext64(ptr %a)  {
 define i64 @LdOffset_i16_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsh x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrsh x0, [x8, #7408]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
   %val = load i16, ptr %arrayidx, align 2
@@ -368,10 +342,8 @@ define i64 @LdOffset_i16_sext64(ptr %a)  {
 define i32 @LdOffset_i32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr w0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldr w0, [x8, #14816]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 4
@@ -382,11 +354,8 @@ define i32 @LdOffset_i32(ptr %a)  {
 define i64 @LdOffset_i32_zext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_zext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr w8, [x0, x8]
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldr w0, [x8, #14816]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 2
@@ -398,10 +367,8 @@ define i64 @LdOffset_i32_zext64(ptr %a)  {
 define i64 @LdOffset_i32_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_sext64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsw x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldrsw x0, [x8, #14816]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
   %val = load i32, ptr %arrayidx, align 2
@@ -413,10 +380,8 @@ define i64 @LdOffset_i32_sext64(ptr %a)  {
 define i64 @LdOffset_i64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
-; CHECK-NEXT:    movk w8, #126, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr x0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT:    ldr x0, [x8, #29632]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
   %val = load i64, ptr %arrayidx, align 4
@@ -427,10 +392,8 @@ define i64 @LdOffset_i64(ptr %a)  {
 define <2 x i32> @LdOffset_v2i32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
-; CHECK-NEXT:    movk w8, #126, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr d0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT:    ldr d0, [x8, #29632]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds <2 x i32>, ptr %a, i64 1039992
   %val = load <2 x i32>, ptr %arrayidx, align 4
@@ -441,10 +404,8 @@ define <2 x i32> @LdOffset_v2i32(ptr %a)  {
 define <2 x i64> @LdOffset_v2i64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #59264 // =0xe780
-; CHECK-NEXT:    movk w8, #253, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr q0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #4048, lsl #12 // =16580608
+; CHECK-NEXT:    ldr q0, [x8, #59264]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds <2 x i64>, ptr %a, i64 1039992
   %val = load <2 x i64>, ptr %arrayidx, align 4
@@ -455,10 +416,8 @@ define <2 x i64> @LdOffset_v2i64(ptr %a)  {
 define double @LdOffset_i8_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #56952 // =0xde78
-; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsb w8, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:    ldrsb w8, [x8, #3704]
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
@@ -471,10 +430,8 @@ define double @LdOffset_i8_f64(ptr %a)  {
 define double @LdOffset_i16_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #48368 // =0xbcf0
-; CHECK-NEXT:    movk w8, #31, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldrsh w8, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #506, lsl #12 // =2072576
+; CHECK-NEXT:    ldrsh w8, [x8, #7408]
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i16, ptr %a, i64 1039992
@@ -487,10 +444,8 @@ define double @LdOffset_i16_f64(ptr %a)  {
 define double @LdOffset_i32_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i32_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #31200 // =0x79e0
-; CHECK-NEXT:    movk w8, #63, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr s0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #1012, lsl #12 // =4145152
+; CHECK-NEXT:    ldr s0, [x8, #14816]
 ; CHECK-NEXT:    ucvtf d0, d0
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i32, ptr %a, i64 1039992
@@ -503,10 +458,8 @@ define double @LdOffset_i32_f64(ptr %a)  {
 define double @LdOffset_i64_f64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i64_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62400 // =0xf3c0
-; CHECK-NEXT:    movk w8, #126, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    ldr d0, [x0, x8]
+; CHECK-NEXT:    add x8, x0, #2024, lsl #12 // =8290304
+; CHECK-NEXT:    ldr d0, [x8, #29632]
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i64, ptr %a, i64 1039992
@@ -554,7 +507,6 @@ define i32 @LdOffset_i16_odd_offset(ptr nocapture noundef readonly %a)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #56953 // =0xde79
 ; CHECK-NEXT:    movk w8, #15, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    ldrsh w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039993
@@ -568,7 +520,6 @@ define i8 @LdOffset_i8_movnwi(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_movnwi:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16777215 // =0xffffff
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    ldrb w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 16777215
@@ -582,7 +533,6 @@ define i8 @LdOffset_i8_too_large(ptr %a)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    movk w8, #256, lsl #16
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    ldrb w0, [x0, x8]
 ; CHECK-NEXT:    ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 16777217
diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index 01e490b260712..aa0b934151fef 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -18,8 +18,6 @@ define void @test_2x8bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #0
 ; CHECK-SVE2p1-SME2:       // %bb.0:
 ; CHECK-SVE2p1-SME2-NEXT:    mov w8, w1
 ; CHECK-SVE2p1-SME2-NEXT:    mov w9, w0
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x8 killed $w8
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.h, p1.h }, x9, x8
 ; CHECK-SVE2p1-SME2-NEXT:    b use
     %r = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 %i, i32 %n)
@@ -181,7 +179,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE-NEXT:    cset w8, mi
 ; CHECK-SVE-NEXT:    mov z1.s, p0/z, #1 // =0x1
 ; CHECK-SVE-NEXT:    fmov s0, w8
-; CHECK-SVE-NEXT:    // kill: def $q0 killed $d0
 ; CHECK-SVE-NEXT:    mov v0.s[1], v1.s[1]
 ; CHECK-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-SVE-NEXT:    b use
@@ -192,7 +189,6 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE2p1-NEXT:    cset w8, mi
 ; CHECK-SVE2p1-NEXT:    mov z1.s, p0/z, #1 // =0x1
 ; CHECK-SVE2p1-NEXT:    fmov s0, w8
-; CHECK-SVE2p1-NEXT:    // kill: def $q0 killed $d0
 ; CHECK-SVE2p1-NEXT:    mov v0.s[1], v1.s[1]
 ; CHECK-SVE2p1-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-SVE2p1-NEXT:    b use
@@ -263,8 +259,6 @@ define void @test_2x16bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #
 ; CHECK-SVE2p1-SME2:       // %bb.0:
 ; CHECK-SVE2p1-SME2-NEXT:    mov w8, w1
 ; CHECK-SVE2p1-SME2-NEXT:    mov w9, w0
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x8 killed $w8
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.b, p1.b }, x9, x8
 ; CHECK-SVE2p1-SME2-NEXT:    b use
   %r = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32 %i, i32 %n)
@@ -297,11 +291,8 @@ define void @test_2x32bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #
 ; CHECK-SVE2p1-SME2-NEXT:    mov w9, w1
 ; CHECK-SVE2p1-SME2-NEXT:    mov w10, w0
 ; CHECK-SVE2p1-SME2-NEXT:    adds w8, w0, w8
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x9 killed $w9
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x10 killed $w10
 ; CHECK-SVE2p1-SME2-NEXT:    csinv w8, w8, wzr, lo
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p0.b, p1.b }, x10, x9
-; CHECK-SVE2p1-SME2-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-SVE2p1-SME2-NEXT:    whilelo { p2.b, p3.b }, x8, x9
 ; CHECK-SVE2p1-SME2-NEXT:    b use
   %r = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i32(i32 %i, i32 %n)
diff --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 054f34bcff6fb..49a0086a7be54 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -102,8 +102,7 @@ define i64 @vscale_4096(<vscale x 16 x i8> %a) #1 {
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w8, s0
-; CHECK-NEXT:    sub w8, w9, w8
-; CHECK-NEXT:    mov w0, w8
+; CHECK-NEXT:    sub w0, w9, w8
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i8(<vscale x 16 x i8> %a, i1 0)
   ret i64 %res
@@ -132,7 +131,6 @@ define i64 @vscale_4096_poison(<vscale x 16 x i8> %a) #1 {
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    sub w8, w9, w8
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    and x0, x8, #0xffff
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i8(<vscale x 16 x i8> %a, i1 1)
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index f03804d0064fd..2a77d4dd33fe5 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -28,21 +28,15 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    mov w8, #10 ; =0xa
 ; CHECK-NEXT:    mov w9, #9 ; =0x9
 ; CHECK-NEXT:    mov w10, #8 ; =0x8
-; CHECK-NEXT:    ; kill: def $x8 killed $w8
-; CHECK-NEXT:    mov w11, #6 ; =0x6
-; CHECK-NEXT:    ; kill: def $x9 killed $w9
-; CHECK-NEXT:    str x8, [sp, #32]
-; CHECK-NEXT:    ; kill: def $x10 killed $w10
-; CHECK-NEXT:    mov w0, #1 ; =0x1
+; CHECK-NEXT:    stp x9, x8, [sp, #24]
 ; CHECK-NEXT:    mov w8, #7 ; =0x7
-; CHECK-NEXT:    stp x10, x9, [sp, #16]
-; CHECK-NEXT:    mov w9, w11
+; CHECK-NEXT:    mov w9, #6 ; =0x6
+; CHECK-NEXT:    mov w0, #1 ; =0x1
 ; CHECK-NEXT:    mov w1, #2 ; =0x2
 ; CHECK-NEXT:    mov w2, #3 ; =0x3
 ; CHECK-NEXT:    mov w3, #4 ; =0x4
 ; CHECK-NEXT:    mov w4, #5 ; =0x5
 ; CHECK-NEXT:    stp d15, d14, [sp, #48] ; 16-byte Folded Spill
-; CHECK-NEXT:    ; kill: def $x8 killed $w8
 ; CHECK-NEXT:    stp d13, d12, [sp, #64] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp d11, d10, [sp, #80] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp d9, d8, [sp, #96] ; 16-byte Folded Spill
@@ -52,7 +46,8 @@ define i32 @caller() nounwind ssp {
 ; CHECK-NEXT:    stp x22, x21, [sp, #160] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #176] ; 16-byte Folded Spill
 ; CHECK-NEXT:    stp x29, x30, [sp, #192] ; 16-byte Folded Spill
-; CHECK-NEXT:    stp x9, x8, [sp]
+; CHECK-NEXT:    stp x8, x10, [sp, #8]
+; CHECK-NEXT:    str x9, [sp]
 ; CHECK-NEXT:    bl _callee
 ; CHECK-NEXT:    ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
index 68032643bcf4d..08fc47d9480ce 100644
--- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
+++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir
@@ -1,6 +1,5 @@
 # RUN: llc -mtriple=aarch64 -o /dev/null -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking -debug-only=regalloc %s 2>&1 | FileCheck %s --check-prefix=CHECK-DBG
 # RUN: llc -mtriple=aarch64 -verify-machineinstrs -o - -run-pass=register-coalescer -aarch64-enable-subreg-liveness-tracking %s | FileCheck %s --check-prefix=CHECK
-# XFAIL: *
 # REQUIRES: asserts
 
 # CHECK-DBG: ********** REGISTER COALESCER **********
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
index f12138b785e85..d40618f2678b6 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
@@ -456,9 +456,8 @@ define void @ldr_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w2, #32
 ; CHECK-NEXT:    rdsvl x10, #1
-; CHECK-NEXT:    mov w9, w8
+; CHECK-NEXT:    sxtw x9, w8
 ; CHECK-NEXT:    add w12, w0, w8
-; CHECK-NEXT:    sxtw x9, w9
 ; CHECK-NEXT:    madd x9, x10, x9, x1
 ; CHECK-NEXT:    ldr za[w12, 1], [x9, #1, mul vl]
 ; CHECK-NEXT:    ldr za[w12, 2], [x9, #2, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
index 0443a097f8e07..03c1f28fbaa18 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
@@ -460,9 +460,8 @@ define void @str_with_off_many_var_high(i32 %tile_slice, ptr %ptr, i64 %vnum) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w2, #32
 ; CHECK-NEXT:    rdsvl x10, #1
-; CHECK-NEXT:    mov w9, w8
+; CHECK-NEXT:    sxtw x9, w8
 ; CHECK-NEXT:    add w12, w0, w8
-; CHECK-NEXT:    sxtw x9, w9
 ; CHECK-NEXT:    madd x9, x10, x9, x1
 ; CHECK-NEXT:    str za[w12, 1], [x9, #1, mul vl]
 ; CHECK-NEXT:    str za[w12, 2], [x9, #2, mul vl]
diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
index 20343f2da18c8..895271d8bfdc8 100644
--- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
+++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll
@@ -94,7 +94,6 @@ define void @se_memset(i64 noundef %n) "aarch64_pstate_sm_enabled" nounwind {
 ; CHECK-MOPS-NEXT:    adrp x8, :got:dst
 ; CHECK-MOPS-NEXT:    mov w9, #2 // =0x2
 ; CHECK-MOPS-NEXT:    ldr x8, [x8, :got_lo12:dst]
-; CHECK-MOPS-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-MOPS-NEXT:    setp [x8]!, x0!, x9
 ; CHECK-MOPS-NEXT:    setm [x8]!, x0!, x9
 ; CHECK-MOPS-NEXT:    sete [x8]!, x0!, x9
diff --git a/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir b/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir
new file mode 100644
index 0000000000000..32574eb0c25d4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=aarch64-srlt-define-superregs -enable-subreg-liveness -o - %s | FileCheck %s
+--- |
+  target triple = "aarch64"
+
+  define void @test_implicit_def_w1_to_x1() { entry: unreachable }
+  define void @test_implicit_def_dead_w1_to_dead_x1() { entry: unreachable }
+  define void @test_implicit_def_d0_to_q0_and_d1_to_q1() { entry: unreachable }
+  define void @test_implicit_def_d0_d1_d2_to_q0_q1_q2() { entry: unreachable }
+  define void @test_implicit_def_d0_d1_d2_to_z0_z1_z2_with_sve() "target-features"="+sve" { entry: unreachable }
+
+---
+name: test_implicit_def_w1_to_x1
+isSSA: false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x1
+
+    ; CHECK-LABEL: name: test_implicit_def_w1_to_x1
+    ; CHECK: liveins: $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $x0 = COPY $x1
+    ; CHECK-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
+    ; CHECK-NEXT: RET_ReallyLR implicit $x1, implicit $x0
+    renamable $x0 = COPY $x1
+    renamable $w1 = ORRWrr $wzr, renamable $w0
+    RET_ReallyLR implicit $x1, implicit $x0
+...
+---
+name: test_implicit_def_dead_w1_to_dead_x1
+isSSA: false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x1
+
+    ; CHECK-LABEL: name: test_implicit_def_dead_w1_to_dead_x1
+    ; CHECK: liveins: $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $x0 = COPY $x1
+    ; CHECK-NEXT: dead renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def dead renamable $x1
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    renamable $x0 = COPY $x1
+    dead renamable $w1 = ORRWrr $wzr, renamable $w0
+    RET_ReallyLR implicit $x0
+...
+---
+name: test_implicit_def_d0_to_q0_and_d1_to_q1
+isSSA: false
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1, $x1
+
+    ; CHECK-LABEL: name: test_implicit_def_d0_to_q0_and_d1_to_q1
+    ; CHECK: liveins: $d0, $d1, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16, implicit-def renamable $q0, implicit-def renamable $q1 :: (load (s64))
+    ; CHECK-NEXT: STPDi renamable $d0, renamable $d1, renamable $x1, 0 :: (store (s64))
+    ; CHECK-NEXT: RET undef $lr
+    early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load (s64))
+    STPDi renamable $d0, renamable $d1, renamable $x1, 0 :: (store (s64))
+    RET undef $lr
+...
+---
+name: test_implicit_def_d0_d1_d2_to_q0_q1_q2
+isSSA: false
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1, $lr, $fp
+
+    ; CHECK-LABEL: name: test_implicit_def_d0_d1_d2_to_q0_q1_q2
+    ; CHECK: liveins: $x0, $x1, $lr, $fp
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $x0, renamable $d0_d1_d2 = LD3Threev8b_POST killed renamable $x0, $xzr, implicit-def renamable $q0_q1_q2
+    ; CHECK-NEXT: FAKE_USE $x0
+    ; CHECK-NEXT: FAKE_USE $d0_d1_d2
+    ; CHECK-NEXT: RET undef $lr
+    renamable $x0, renamable $d0_d1_d2 = LD3Threev8b_POST killed renamable $x0, $xzr
+    FAKE_USE $x0
+    FAKE_USE $d0_d1_d2
+    RET undef $lr
+...
+---
+name: test_implicit_def_d0_d1_d2_to_z0_z1_z2_with_sve
+isSSA: false
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1, $lr, $fp
+
+    ; CHECK-LABEL: name: test_implicit_def_d0_d1_d2_to_z0_z1_z2_with_sve
+    ; CHECK: liveins: $x0, $x1, $lr, $fp
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: renamable $x0, renamable $d0_d1_d2 = LD3Threev8b_POST killed renamable $x0, $xzr, implicit-def renamable $z0_z1_z2
+    ; CHECK-NEXT: FAKE_USE $x0
+    ; CHECK-NEXT: FAKE_USE $d0_d1_d2
+    ; CHECK-NEXT: RET undef $lr
+    renamable $x0, renamable $d0_d1_d2 = LD3Threev8b_POST killed renamable $x0, $xzr
+    FAKE_USE $x0
+    FAKE_USE $d0_d1_d2
+    RET undef $lr
+...
diff --git a/llvm/test/CodeGen/AArch64/subreg_to_reg_coalescing_issue.mir b/llvm/test/CodeGen/AArch64/subreg_to_reg_coalescing_issue.mir
index 0d472fba05039..b8fa4a2fef901 100644
--- a/llvm/test/CodeGen/AArch64/subreg_to_reg_coalescing_issue.mir
+++ b/llvm/test/CodeGen/AArch64/subreg_to_reg_coalescing_issue.mir
@@ -14,8 +14,7 @@ body: |
     ; SRLT: liveins: $x1
     ; SRLT-NEXT: {{  $}}
     ; SRLT-NEXT: $x0 = ORRXrr $xzr, $x1
-    ; SRLT-NEXT: renamable $w8 = ORRWrr $wzr, renamable $w0
-    ; SRLT-NEXT: $w1 = ORRWrr $wzr, killed $w8, implicit-def $x1
+    ; SRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1
     ; SRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0
     ;
     ; NOSRLT-LABEL: name: dont_remove_orr_w
diff --git a/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll b/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll
index 649188431f4d8..c1f64892ef895 100644
--- a/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bf16-reductions.ll
@@ -224,7 +224,6 @@ define float @promoted_fadd(<vscale x 4 x bfloat> %a) {
 ; SVE-NEXT:    ptrue p0.s
 ; SVE-NEXT:    faddv s0, p0, z0.s
 ; SVE-NEXT:    bfcvt h0, s0
-; SVE-NEXT:    // kill: def $d0 killed $h0
 ; SVE-NEXT:    shll v0.4s, v0.4h, #16
 ; SVE-NEXT:    ret
 ;
@@ -234,7 +233,6 @@ define float @promoted_fadd(<vscale x 4 x bfloat> %a) {
 ; SME-NEXT:    ptrue p0.s
 ; SME-NEXT:    faddv s0, p0, z0.s
 ; SME-NEXT:    bfcvt h0, s0
-; SME-NEXT:    // kill: def $s0 killed $h0
 ; SME-NEXT:    fmov w8, s0
 ; SME-NEXT:    lsl w8, w8, #16
 ; SME-NEXT:    fmov s0, w8
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
index 27a380772dd51..058b58454afde 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
@@ -151,7 +151,6 @@ define void @test_svprf_vnum_over(<vscale x 16 x i1> %pg, ptr %base) {
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov w9, #512 // =0x200
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    prfb pstl3strm, p0, [x0, x8]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index b951e0f395893..0ce7f5a645be2 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -481,7 +481,6 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov w9, #512 // =0x200
 ; CHECK-NEXT:    lsr x8, x8, #4
-; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    mul x8, x8, x9
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
index c60d69b9fb5a4..1c475bf85f704 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -82,7 +82,6 @@ define <vscale x 2 x i1> @whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilele p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3)
@@ -102,7 +101,6 @@ define <vscale x 16 x i1> @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilele p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9)
@@ -219,7 +217,6 @@ define <vscale x 2 x i1> @whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3)
@@ -239,7 +236,6 @@ define <vscale x 16 x i1> @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9)
@@ -346,7 +342,6 @@ define <vscale x 2 x i1> @whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3)
@@ -366,7 +361,6 @@ define <vscale x 16 x i1> @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilels p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9)
@@ -482,7 +476,6 @@ define <vscale x 2 x i1> @whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec() {
 ; CHECK-LABEL: whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3 // =0x3
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelt p0.d, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3)
@@ -502,7 +495,6 @@ define <vscale x 16 x i1> @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() {
 ; CHECK-LABEL: whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #9 // =0x9
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    whilelt p0.b, xzr, x8
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index dc17513daf330..f6c274ed1eaa8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -130,14 +130,11 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap)  {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    mov w8, #4 // =0x4
 ; CHECK-NEXT:    mov w9, #8 // =0x8
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    mov w10, #12 // =0xc
-; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, x8]
-; CHECK-NEXT:    mov w8, w9
 ; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
-; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, x8]
-; CHECK-NEXT:    mov w8, w10
-; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, x8]
+; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, x9]
+; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, x10]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 0a9baf7acd974..c1a5a521cde5f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -13,8 +13,6 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr h0, [x1]
 ; SVE-NEXT:    ldr h1, [x0]
-; SVE-NEXT:    // kill: def $z0 killed $h0
-; SVE-NEXT:    // kill: def $z1 killed $h1
 ; SVE-NEXT:    and z0.h, z0.h, #0x8000
 ; SVE-NEXT:    and z1.h, z1.h, #0x7fff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -24,12 +22,10 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_f16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
-; SVE2-NEXT:    ldr h1, [x0]
-; SVE2-NEXT:    ldr h2, [x1]
-; SVE2-NEXT:    // kill: def $z1 killed $h1
-; SVE2-NEXT:    // kill: def $z2 killed $h2
-; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
-; SVE2-NEXT:    str h1, [x0]
+; SVE2-NEXT:    ldr h1, [x1]
+; SVE2-NEXT:    ldr h2, [x0]
+; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT:    str h2, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_f16:
@@ -61,8 +57,6 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr h0, [x1]
 ; SVE-NEXT:    ldr h1, [x0]
-; SVE-NEXT:    // kill: def $z0 killed $h0
-; SVE-NEXT:    // kill: def $z1 killed $h1
 ; SVE-NEXT:    and z0.h, z0.h, #0x8000
 ; SVE-NEXT:    and z1.h, z1.h, #0x7fff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -72,12 +66,10 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_bf16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.h, #32767 // =0x7fff
-; SVE2-NEXT:    ldr h1, [x0]
-; SVE2-NEXT:    ldr h2, [x1]
-; SVE2-NEXT:    // kill: def $z1 killed $h1
-; SVE2-NEXT:    // kill: def $z2 killed $h2
-; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
-; SVE2-NEXT:    str h1, [x0]
+; SVE2-NEXT:    ldr h1, [x1]
+; SVE2-NEXT:    ldr h2, [x0]
+; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT:    str h2, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_bf16:
@@ -86,7 +78,6 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldr h0, [x0]
 ; NONEON-NOSVE-NEXT:    ldr h1, [x1]
-; NONEON-NOSVE-NEXT:    // kill: def $s0 killed $h0
 ; NONEON-NOSVE-NEXT:    fmov w8, s0
 ; NONEON-NOSVE-NEXT:    str h1, [sp, #12]
 ; NONEON-NOSVE-NEXT:    lsl w8, w8, #16
@@ -114,8 +105,6 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr s0, [x1]
 ; SVE-NEXT:    ldr s1, [x0]
-; SVE-NEXT:    // kill: def $z0 killed $s0
-; SVE-NEXT:    // kill: def $z1 killed $s1
 ; SVE-NEXT:    and z0.s, z0.s, #0x80000000
 ; SVE-NEXT:    and z1.s, z1.s, #0x7fffffff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -125,12 +114,10 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_f32:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.s, #0x7fffffff
-; SVE2-NEXT:    ldr s1, [x0]
-; SVE2-NEXT:    ldr s2, [x1]
-; SVE2-NEXT:    // kill: def $z1 killed $s1
-; SVE2-NEXT:    // kill: def $z2 killed $s2
-; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
-; SVE2-NEXT:    str s1, [x0]
+; SVE2-NEXT:    ldr s1, [x1]
+; SVE2-NEXT:    ldr s2, [x0]
+; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT:    str s2, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_f32:
@@ -155,8 +142,6 @@ define void @test_copysign_f64(ptr %ap, ptr %bp) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ldr d0, [x1]
 ; SVE-NEXT:    ldr d1, [x0]
-; SVE-NEXT:    // kill: def $z0 killed $d0
-; SVE-NEXT:    // kill: def $z1 killed $d1
 ; SVE-NEXT:    and z0.d, z0.d, #0x8000000000000000
 ; SVE-NEXT:    and z1.d, z1.d, #0x7fffffffffffffff
 ; SVE-NEXT:    orr z0.d, z1.d, z0.d
@@ -166,12 +151,10 @@ define void @test_copysign_f64(ptr %ap, ptr %bp) {
 ; SVE2-LABEL: test_copysign_f64:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z0.d, #0x7fffffffffffffff
-; SVE2-NEXT:    ldr d1, [x0]
-; SVE2-NEXT:    ldr d2, [x1]
-; SVE2-NEXT:    // kill: def $z1 killed $d1
-; SVE2-NEXT:    // kill: def $z2 killed $d2
-; SVE2-NEXT:    bsl z1.d, z1.d, z2.d, z0.d
-; SVE2-NEXT:    str d1, [x0]
+; SVE2-NEXT:    ldr d1, [x1]
+; SVE2-NEXT:    ldr d2, [x0]
+; SVE2-NEXT:    bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT:    str d2, [x0]
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_copysign_f64:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 49b207016348d..a08b71ce83ec1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -508,14 +508,12 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1>
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    str d2, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #8]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #16]
-; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
-; NONEON-NOSVE-NEXT:    cmp x8, #0
-; NONEON-NOSVE-NEXT:    mov w8, w9
 ; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
+; NONEON-NOSVE-NEXT:    cmp x8, #0
+; NONEON-NOSVE-NEXT:    sbfx x8, x9, #0, #1
 ; NONEON-NOSVE-NEXT:    fcsel d3, d2, d0, ne
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    cmp x8, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 8fa3c00f506a6..7b99ffd79666f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -1798,15 +1798,12 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
@@ -1814,14 +1811,12 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr w9, w9, #6
 ; NONEON-NOSVE-NEXT:    stp w9, w11, [sp, #56]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
@@ -1829,14 +1824,12 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr w9, w9, #6
 ; NONEON-NOSVE-NEXT:    stp w9, w11, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
@@ -1844,14 +1837,12 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr w9, w9, #6
 ; NONEON-NOSVE-NEXT:    stp w9, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #4]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x10, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w10, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    umull x8, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w8
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 9ab036ab4757d..9789d7f564a27 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -92,16 +92,12 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 ; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp]
 ; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldp w11, w10, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #3
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    sbfx x9, x9, #0, #3
 ; NONEON-NOSVE-NEXT:    sbfx x10, x10, #0, #3
 ; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
-; NONEON-NOSVE-NEXT:    mov w8, w11
-; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #3
+; NONEON-NOSVE-NEXT:    sbfx x8, x11, #0, #3
 ; NONEON-NOSVE-NEXT:    stp x8, x10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x0]
@@ -928,157 +924,125 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_offset w29, -96
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #16]
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #17]
-; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #18]
-; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #19]
-; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #17]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #47]
-; NONEON-NOSVE-NEXT:    add w0, w14, w14
-; NONEON-NOSVE-NEXT:    add w7, w16, w16
-; NONEON-NOSVE-NEXT:    add w16, w17, w17
-; NONEON-NOSVE-NEXT:    add w17, w18, w18
-; NONEON-NOSVE-NEXT:    mov w18, w0
-; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
-; NONEON-NOSVE-NEXT:    sxtb x19, w18
-; NONEON-NOSVE-NEXT:    // kill: def $x17 killed $w17
-; NONEON-NOSVE-NEXT:    sxtb x20, w17
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #18]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #19]
+; NONEON-NOSVE-NEXT:    add w16, w16, w16
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
 ; NONEON-NOSVE-NEXT:    ldrb w30, [sp, #21]
+; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
+; NONEON-NOSVE-NEXT:    sxtb x19, w17
+; NONEON-NOSVE-NEXT:    sxtb x20, w16
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    mov w18, w16
-; NONEON-NOSVE-NEXT:    // kill: def $x7 killed $w7
+; NONEON-NOSVE-NEXT:    add w7, w14, w14
+; NONEON-NOSVE-NEXT:    add w18, w15, w15
+; NONEON-NOSVE-NEXT:    sxtb x21, w18
 ; NONEON-NOSVE-NEXT:    ldrb w29, [sp, #22]
+; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #23]
 ; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #160]
 ; NONEON-NOSVE-NEXT:    add w19, w30, w30
-; NONEON-NOSVE-NEXT:    sxtb x21, w18
-; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #23]
 ; NONEON-NOSVE-NEXT:    sxtb x7, w7
-; NONEON-NOSVE-NEXT:    // kill: def $x19 killed $w19
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    add w8, w8, w8
 ; NONEON-NOSVE-NEXT:    ldrb w27, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldrb w28, [sp, #25]
-; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #26]
 ; NONEON-NOSVE-NEXT:    sxtb x19, w19
 ; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    stp x7, x21, [sp, #144]
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
 ; NONEON-NOSVE-NEXT:    add w7, w29, w29
+; NONEON-NOSVE-NEXT:    ldrb w25, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w26, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
+; NONEON-NOSVE-NEXT:    sxtb x9, w9
 ; NONEON-NOSVE-NEXT:    stp x8, x19, [sp, #128]
 ; NONEON-NOSVE-NEXT:    add w19, w28, w28
-; NONEON-NOSVE-NEXT:    // kill: def $x7 killed $w7
-; NONEON-NOSVE-NEXT:    add w8, w27, w27
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
 ; NONEON-NOSVE-NEXT:    sxtb x7, w7
-; NONEON-NOSVE-NEXT:    // kill: def $x19 killed $w19
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
-; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
+; NONEON-NOSVE-NEXT:    add w8, w27, w27
 ; NONEON-NOSVE-NEXT:    sxtb x19, w19
 ; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    ldrb w23, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w24, [sp, #29]
 ; NONEON-NOSVE-NEXT:    stp x7, x9, [sp, #112]
 ; NONEON-NOSVE-NEXT:    add w9, w26, w26
 ; NONEON-NOSVE-NEXT:    add w7, w25, w25
 ; NONEON-NOSVE-NEXT:    stp x8, x19, [sp, #96]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    mov w8, w7
-; NONEON-NOSVE-NEXT:    add w19, w24, w24
 ; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
-; NONEON-NOSVE-NEXT:    add w7, w23, w23
+; NONEON-NOSVE-NEXT:    sxtb x8, w7
 ; NONEON-NOSVE-NEXT:    ldrb w6, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w22, [sp, #31]
+; NONEON-NOSVE-NEXT:    add w19, w24, w24
+; NONEON-NOSVE-NEXT:    add w7, w23, w23
 ; NONEON-NOSVE-NEXT:    ldrb w4, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #80]
-; NONEON-NOSVE-NEXT:    mov w9, w19
-; NONEON-NOSVE-NEXT:    mov w8, w7
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    sxtb x9, w19
+; NONEON-NOSVE-NEXT:    sxtb x8, w7
+; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #33]
 ; NONEON-NOSVE-NEXT:    add w19, w22, w22
 ; NONEON-NOSVE-NEXT:    add w6, w6, w6
-; NONEON-NOSVE-NEXT:    ldrb w5, [sp, #33]
-; NONEON-NOSVE-NEXT:    add w4, w4, w4
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #64]
-; NONEON-NOSVE-NEXT:    mov w9, w19
-; NONEON-NOSVE-NEXT:    mov w8, w6
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
-; NONEON-NOSVE-NEXT:    add w5, w5, w5
+; NONEON-NOSVE-NEXT:    sxtb x9, w19
+; NONEON-NOSVE-NEXT:    sxtb x8, w6
 ; NONEON-NOSVE-NEXT:    ldrb w2, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w3, [sp, #35]
-; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #36]
+; NONEON-NOSVE-NEXT:    add w5, w5, w5
+; NONEON-NOSVE-NEXT:    add w4, w4, w4
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #48]
-; NONEON-NOSVE-NEXT:    mov w9, w5
-; NONEON-NOSVE-NEXT:    mov w8, w4
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    sxtb x9, w5
+; NONEON-NOSVE-NEXT:    sxtb x8, w4
+; NONEON-NOSVE-NEXT:    ldrb w18, [sp, #36]
+; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #37]
 ; NONEON-NOSVE-NEXT:    add w3, w3, w3
 ; NONEON-NOSVE-NEXT:    add w2, w2, w2
-; NONEON-NOSVE-NEXT:    ldrb w0, [sp, #37]
-; NONEON-NOSVE-NEXT:    add w18, w18, w18
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #288]
-; NONEON-NOSVE-NEXT:    mov w9, w3
-; NONEON-NOSVE-NEXT:    mov w8, w2
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
-; NONEON-NOSVE-NEXT:    add w0, w0, w0
 ; NONEON-NOSVE-NEXT:    ldrb w16, [sp, #38]
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #288]
+; NONEON-NOSVE-NEXT:    sxtb x9, w3
+; NONEON-NOSVE-NEXT:    sxtb x8, w2
 ; NONEON-NOSVE-NEXT:    ldrb w17, [sp, #39]
-; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #40]
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    add w18, w18, w18
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #272]
-; NONEON-NOSVE-NEXT:    mov w9, w0
-; NONEON-NOSVE-NEXT:    mov w8, w18
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    sxtb x9, w0
+; NONEON-NOSVE-NEXT:    sxtb x8, w18
+; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #40]
+; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #41]
 ; NONEON-NOSVE-NEXT:    add w17, w17, w17
 ; NONEON-NOSVE-NEXT:    add w16, w16, w16
-; NONEON-NOSVE-NEXT:    ldrb w15, [sp, #41]
-; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #256]
-; NONEON-NOSVE-NEXT:    mov w9, w17
-; NONEON-NOSVE-NEXT:    mov w8, w16
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
-; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    sxtb x9, w17
+; NONEON-NOSVE-NEXT:    sxtb x8, w16
 ; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #43]
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
+; NONEON-NOSVE-NEXT:    add w14, w14, w14
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #44]
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #240]
-; NONEON-NOSVE-NEXT:    mov w9, w15
-; NONEON-NOSVE-NEXT:    mov w8, w14
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    sxtb x9, w15
+; NONEON-NOSVE-NEXT:    sxtb x8, w14
+; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #45]
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
 ; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #45]
-; NONEON-NOSVE-NEXT:    add w10, w10, w10
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #224]
-; NONEON-NOSVE-NEXT:    mov w9, w13
-; NONEON-NOSVE-NEXT:    mov w8, w12
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    sxtb x9, w13
+; NONEON-NOSVE-NEXT:    sxtb x8, w12
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
 ; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #208]
-; NONEON-NOSVE-NEXT:    mov w9, w11
+; NONEON-NOSVE-NEXT:    sxtb x9, w11
 ; NONEON-NOSVE-NEXT:    ldr w11, [sp, #8] // 4-byte Reload
-; NONEON-NOSVE-NEXT:    mov w8, w10
+; NONEON-NOSVE-NEXT:    sxtb x8, w10
 ; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12] // 4-byte Reload
-; NONEON-NOSVE-NEXT:    sxtb x9, w9
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
 ; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
-; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #192]
-; NONEON-NOSVE-NEXT:    mov w8, w10
 ; NONEON-NOSVE-NEXT:    sxtb x11, w11
-; NONEON-NOSVE-NEXT:    sxtb x8, w8
+; NONEON-NOSVE-NEXT:    stp x8, x9, [sp, #192]
+; NONEON-NOSVE-NEXT:    sxtb x8, w10
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp q5, q4, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #48]
 ; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #176]
+; NONEON-NOSVE-NEXT:    ldp q7, q6, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldp q17, q16, [sp, #272]
 ; NONEON-NOSVE-NEXT:    ldp q18, q21, [sp, #176]
 ; NONEON-NOSVE-NEXT:    ldp q20, q19, [sp, #240]
@@ -1363,76 +1327,60 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 160
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #2]
-; NONEON-NOSVE-NEXT:    ldrh w16, [sp]
-; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #6]
+; NONEON-NOSVE-NEXT:    ldrh w13, [sp]
+; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #2]
+; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #6]
 ; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #4]
 ; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #10]
 ; NONEON-NOSVE-NEXT:    add w13, w13, w13
 ; NONEON-NOSVE-NEXT:    add w16, w16, w16
-; NONEON-NOSVE-NEXT:    add w15, w15, w15
-; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
-; NONEON-NOSVE-NEXT:    // kill: def $x16 killed $w16
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    sxth x16, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w13
 ; NONEON-NOSVE-NEXT:    add w3, w3, w3
-; NONEON-NOSVE-NEXT:    // kill: def $x15 killed $w15
+; NONEON-NOSVE-NEXT:    sxth x12, w12
 ; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #14]
-; NONEON-NOSVE-NEXT:    sxth x13, w13
-; NONEON-NOSVE-NEXT:    sxth x16, w16
-; NONEON-NOSVE-NEXT:    sxth x15, w15
-; NONEON-NOSVE-NEXT:    // kill: def $x3 killed $w3
-; NONEON-NOSVE-NEXT:    add w0, w0, w0
+; NONEON-NOSVE-NEXT:    stp x13, x16, [sp, #80]
+; NONEON-NOSVE-NEXT:    sxth x13, w3
+; NONEON-NOSVE-NEXT:    add w16, w5, w5
+; NONEON-NOSVE-NEXT:    add w3, w4, w4
 ; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #18]
-; NONEON-NOSVE-NEXT:    stp x16, x13, [sp, #80]
-; NONEON-NOSVE-NEXT:    sxth x16, w3
-; NONEON-NOSVE-NEXT:    add w13, w5, w5
-; NONEON-NOSVE-NEXT:    add w3, w4, w4
-; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
-; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #64]
+; NONEON-NOSVE-NEXT:    sxth x12, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w3
+; NONEON-NOSVE-NEXT:    add w16, w2, w2
+; NONEON-NOSVE-NEXT:    add w0, w0, w0
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    stp x16, x15, [sp, #64]
-; NONEON-NOSVE-NEXT:    mov w16, w3
-; NONEON-NOSVE-NEXT:    sxth x13, w13
-; NONEON-NOSVE-NEXT:    sxth x16, w16
-; NONEON-NOSVE-NEXT:    add w15, w2, w2
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #48]
+; NONEON-NOSVE-NEXT:    sxth x12, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w0
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #26]
-; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #20]
-; NONEON-NOSVE-NEXT:    stp x16, x13, [sp, #48]
-; NONEON-NOSVE-NEXT:    mov w13, w15
-; NONEON-NOSVE-NEXT:    mov w16, w0
-; NONEON-NOSVE-NEXT:    sxth x13, w13
-; NONEON-NOSVE-NEXT:    sxth x16, w16
-; NONEON-NOSVE-NEXT:    add w15, w18, w18
-; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #22]
-; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    stp x16, x13, [sp, #32]
-; NONEON-NOSVE-NEXT:    mov w13, w15
-; NONEON-NOSVE-NEXT:    mov w15, w17
-; NONEON-NOSVE-NEXT:    sxth x13, w13
-; NONEON-NOSVE-NEXT:    sxth x15, w15
+; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #20]
+; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #22]
+; NONEON-NOSVE-NEXT:    add w16, w18, w18
+; NONEON-NOSVE-NEXT:    add w17, w17, w17
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #32]
+; NONEON-NOSVE-NEXT:    sxth x12, w16
+; NONEON-NOSVE-NEXT:    sxth x13, w17
+; NONEON-NOSVE-NEXT:    add w15, w15, w15
 ; NONEON-NOSVE-NEXT:    add w14, w14, w14
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
 ; NONEON-NOSVE-NEXT:    add w10, w10, w10
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
-; NONEON-NOSVE-NEXT:    stp x15, x13, [sp, #144]
-; NONEON-NOSVE-NEXT:    mov w13, w14
-; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
-; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
-; NONEON-NOSVE-NEXT:    sxth x13, w13
-; NONEON-NOSVE-NEXT:    sxth x12, w12
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #144]
+; NONEON-NOSVE-NEXT:    sxth x12, w15
+; NONEON-NOSVE-NEXT:    sxth x13, w14
 ; NONEON-NOSVE-NEXT:    sxth x11, w11
 ; NONEON-NOSVE-NEXT:    sxth x10, w10
 ; NONEON-NOSVE-NEXT:    sxth x9, w9
 ; NONEON-NOSVE-NEXT:    sxth x8, w8
-; NONEON-NOSVE-NEXT:    stp x12, x13, [sp, #128]
+; NONEON-NOSVE-NEXT:    stp x13, x12, [sp, #128]
 ; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp x10, x11, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp q4, q3, [sp, #32]
@@ -1513,34 +1461,26 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 ; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    add w8, w8, w8
 ; NONEON-NOSVE-NEXT:    add w9, w9, w9
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
 ; NONEON-NOSVE-NEXT:    sxtw x8, w8
 ; NONEON-NOSVE-NEXT:    sxtw x9, w9
-; NONEON-NOSVE-NEXT:    add w11, w11, w11
-; NONEON-NOSVE-NEXT:    add w10, w10, w10
-; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
+; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
 ; NONEON-NOSVE-NEXT:    add w8, w15, w15
 ; NONEON-NOSVE-NEXT:    add w9, w14, w14
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    add w13, w13, w13
-; NONEON-NOSVE-NEXT:    add w12, w12, w12
-; NONEON-NOSVE-NEXT:    sxtw x11, w11
 ; NONEON-NOSVE-NEXT:    sxtw x8, w8
 ; NONEON-NOSVE-NEXT:    sxtw x9, w9
-; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
-; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
+; NONEON-NOSVE-NEXT:    add w11, w11, w11
+; NONEON-NOSVE-NEXT:    add w13, w13, w13
+; NONEON-NOSVE-NEXT:    add w12, w12, w12
+; NONEON-NOSVE-NEXT:    add w10, w10, w10
 ; NONEON-NOSVE-NEXT:    sxtw x13, w13
 ; NONEON-NOSVE-NEXT:    sxtw x12, w12
+; NONEON-NOSVE-NEXT:    sxtw x11, w11
 ; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #80]
-; NONEON-NOSVE-NEXT:    mov w8, w10
-; NONEON-NOSVE-NEXT:    sxtw x8, w8
+; NONEON-NOSVE-NEXT:    sxtw x8, w10
 ; NONEON-NOSVE-NEXT:    stp x12, x13, [sp, #32]
-; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp x8, x11, [sp, #64]
+; NONEON-NOSVE-NEXT:    ldp q3, q2, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q2, q3, [x1]
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [x1, #32]
@@ -3009,7 +2949,6 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
 ; NONEON-NOSVE-NEXT:    mov w9, w0
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
 ; NONEON-NOSVE-NEXT:    mul x10, x9, x8
 ; NONEON-NOSVE-NEXT:    ldr x8, [sp]
 ; NONEON-NOSVE-NEXT:    mul x8, x9, x8
@@ -3030,7 +2969,6 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; CHECK-LABEL: extend_no_mul:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, w0
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    mov z0.d, x8
 ; CHECK-NEXT:    str q0, [x1]
 ; CHECK-NEXT:    ret
@@ -3038,7 +2976,6 @@ define void @extend_no_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; NONEON-NOSVE-LABEL: extend_no_mul:
 ; NONEON-NOSVE:       // %bb.0: // %entry
 ; NONEON-NOSVE-NEXT:    mov w8, w0
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    stp x8, x8, [sp, #-16]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index af1fb3576e110..951bc8b93c595 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -2016,14 +2016,10 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #32
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
-; NONEON-NOSVE-NEXT:    ldp w11, w10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
+; NONEON-NOSVE-NEXT:    ldp w11, w10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    umull x9, w9, w10
-; NONEON-NOSVE-NEXT:    mov w10, w11
-; NONEON-NOSVE-NEXT:    umull x8, w8, w10
+; NONEON-NOSVE-NEXT:    umull x8, w8, w11
 ; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24]
@@ -2058,27 +2054,19 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #32]
-; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
 ; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldp w13, w12, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
 ; NONEON-NOSVE-NEXT:    umull x11, w11, w12
-; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
 ; NONEON-NOSVE-NEXT:    ldp w14, w12, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
-; NONEON-NOSVE-NEXT:    umull x9, w9, w12
 ; NONEON-NOSVE-NEXT:    umull x10, w10, w13
 ; NONEON-NOSVE-NEXT:    lsr x11, x11, #32
-; NONEON-NOSVE-NEXT:    mov w12, w14
-; NONEON-NOSVE-NEXT:    umull x8, w8, w12
-; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
+; NONEON-NOSVE-NEXT:    umull x9, w9, w12
+; NONEON-NOSVE-NEXT:    umull x8, w8, w14
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
-; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
+; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
 ; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #72]
+; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #64]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #80
@@ -2128,46 +2116,30 @@ define void @umulh_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #96]
-; NONEON-NOSVE-NEXT:    // kill: def $x14 killed $w14
-; NONEON-NOSVE-NEXT:    // kill: def $x15 killed $w15
 ; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #104]
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldp d1, d0, [sp, #64]
-; NONEON-NOSVE-NEXT:    // kill: def $x13 killed $w13
-; NONEON-NOSVE-NEXT:    // kill: def $x12 killed $w12
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
-; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp w17, w16, [sp, #112]
-; NONEON-NOSVE-NEXT:    // kill: def $x16 killed $w16
-; NONEON-NOSVE-NEXT:    // kill: def $x17 killed $w17
 ; NONEON-NOSVE-NEXT:    umull x15, w15, w16
-; NONEON-NOSVE-NEXT:    ldp w16, w2, [sp, #88]
+; NONEON-NOSVE-NEXT:    ldp w18, w16, [sp, #120]
 ; NONEON-NOSVE-NEXT:    umull x14, w14, w17
-; NONEON-NOSVE-NEXT:    // kill: def $x16 killed $w16
-; NONEON-NOSVE-NEXT:    ldp w17, w3, [sp, #80]
-; NONEON-NOSVE-NEXT:    ldp w18, w1, [sp, #120]
-; NONEON-NOSVE-NEXT:    // kill: def $x1 killed $w1
-; NONEON-NOSVE-NEXT:    // kill: def $x18 killed $w18
-; NONEON-NOSVE-NEXT:    umull x13, w13, w1
-; NONEON-NOSVE-NEXT:    // kill: def $x17 killed $w17
+; NONEON-NOSVE-NEXT:    ldp w17, w1, [sp, #80]
+; NONEON-NOSVE-NEXT:    umull x13, w13, w16
 ; NONEON-NOSVE-NEXT:    lsr x15, x15, #32
-; NONEON-NOSVE-NEXT:    lsr x14, x14, #32
 ; NONEON-NOSVE-NEXT:    umull x12, w12, w18
-; NONEON-NOSVE-NEXT:    mov w18, w2
-; NONEON-NOSVE-NEXT:    mov w1, w3
+; NONEON-NOSVE-NEXT:    lsr x14, x14, #32
+; NONEON-NOSVE-NEXT:    ldp w16, w18, [sp, #88]
 ; NONEON-NOSVE-NEXT:    umull x11, w11, w1
+; NONEON-NOSVE-NEXT:    lsr x13, x13, #32
 ; NONEON-NOSVE-NEXT:    stp w14, w15, [sp, #152]
 ; NONEON-NOSVE-NEXT:    umull x10, w10, w17
-; NONEON-NOSVE-NEXT:    lsr x13, x13, #32
-; NONEON-NOSVE-NEXT:    umull x9, w9, w18
 ; NONEON-NOSVE-NEXT:    lsr x12, x12, #32
+; NONEON-NOSVE-NEXT:    umull x9, w9, w18
 ; NONEON-NOSVE-NEXT:    umull x8, w8, w16
 ; NONEON-NOSVE-NEXT:    lsr x11, x11, #32
-; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    stp w12, w13, [sp, #144]
+; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    lsr x9, x9, #32
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    stp w10, w11, [sp, #136]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index cf3e10a2217ae..5d8466e31abff 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -26,20 +26,16 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldrsb w11, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    ldrsb w13, [sp, #10]
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrsb w14, [sp, #8]
 ; NONEON-NOSVE-NEXT:    asr w10, w11, w10
-; NONEON-NOSVE-NEXT:    mov w11, w12
+; NONEON-NOSVE-NEXT:    asr w11, w13, w12
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    mov w8, w9
-; NONEON-NOSVE-NEXT:    asr w11, w13, w11
-; NONEON-NOSVE-NEXT:    asr w8, w14, w8
+; NONEON-NOSVE-NEXT:    asr w8, w14, w9
 ; NONEON-NOSVE-NEXT:    strh w10, [sp, #28]
 ; NONEON-NOSVE-NEXT:    strh w11, [sp, #26]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
@@ -64,42 +60,34 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -122,82 +110,66 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -228,162 +200,130 @@ define void @ashr_v32i8(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #47]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #45]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #43]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #41]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #39]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #37]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #35]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #33]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -415,10 +355,8 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrsh w11, [sp, #8]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
-; NONEON-NOSVE-NEXT:    mov w9, w10
-; NONEON-NOSVE-NEXT:    asr w9, w11, w9
+; NONEON-NOSVE-NEXT:    asr w9, w11, w10
 ; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
@@ -441,22 +379,18 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -479,42 +413,34 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -545,82 +471,66 @@ define void @ashr_v16i16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrsh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -648,10 +558,8 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -674,18 +582,14 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -716,34 +620,26 @@ define void @ashr_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    asr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -866,20 +762,16 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrb w12, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    ldrb w13, [sp, #10]
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrb w14, [sp, #8]
 ; NONEON-NOSVE-NEXT:    lsr w10, w11, w10
-; NONEON-NOSVE-NEXT:    mov w11, w12
+; NONEON-NOSVE-NEXT:    lsr w11, w13, w12
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    mov w8, w9
-; NONEON-NOSVE-NEXT:    lsr w11, w13, w11
-; NONEON-NOSVE-NEXT:    lsr w8, w14, w8
+; NONEON-NOSVE-NEXT:    lsr w8, w14, w9
 ; NONEON-NOSVE-NEXT:    strh w10, [sp, #28]
 ; NONEON-NOSVE-NEXT:    strh w11, [sp, #26]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
@@ -904,42 +796,34 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -962,82 +846,66 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1068,162 +936,130 @@ define void @lshr_v32i8(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #47]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #45]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #43]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #39]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #37]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #35]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #33]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1255,10 +1091,8 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #8]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
-; NONEON-NOSVE-NEXT:    mov w9, w10
-; NONEON-NOSVE-NEXT:    lsr w9, w11, w9
+; NONEON-NOSVE-NEXT:    lsr w9, w11, w10
 ; NONEON-NOSVE-NEXT:    stp w9, w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32
@@ -1281,22 +1115,18 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1319,42 +1149,34 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1385,82 +1207,66 @@ define void @lshr_v16i16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1488,10 +1294,8 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1514,18 +1318,14 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1556,34 +1356,26 @@ define void @lshr_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsr w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -1705,8 +1497,6 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #20]
 ; NONEON-NOSVE-NEXT:    ldr w10, [sp, #12]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w9
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
@@ -1734,13 +1524,9 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    ldrb w11, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #14]
 ; NONEON-NOSVE-NEXT:    ldrb w10, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x11 killed $w11
-; NONEON-NOSVE-NEXT:    // kill: def $x10 killed $w10
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #18]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
 ; NONEON-NOSVE-NEXT:    lsl w11, w12, w11
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    strh w11, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #12]
 ; NONEON-NOSVE-NEXT:    lsl w10, w11, w10
@@ -1772,42 +1558,34 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #29]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #27]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #25]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -1830,82 +1608,66 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -1936,162 +1698,130 @@ define void @shl_v32i8(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #63]
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #47]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #46]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #95]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #62]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #45]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #61]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #93]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #43]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #59]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #91]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #41]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #57]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #89]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #39]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #55]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #87]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #37]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #53]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #85]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #35]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #51]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #83]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #33]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #49]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #81]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #15]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #31]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #79]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #13]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #29]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #77]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #27]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #75]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #9]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #25]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #7]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #23]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #71]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #5]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #21]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #69]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #3]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #19]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #67]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp, #1]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #17]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrb w9, [sp]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #65]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -2119,22 +1849,18 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #28]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #26]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -2157,42 +1883,34 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #46]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #42]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #38]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #34]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -2223,82 +1941,66 @@ define void @shl_v16i16(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #62]
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #46]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #44]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #42]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #92]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #58]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #40]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #38]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #54]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #36]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #34]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #84]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #50]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #32]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #14]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #12]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #10]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #8]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #6]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #4]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #2]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldrh w9, [sp]
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
@@ -2326,10 +2028,8 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
@@ -2352,18 +2052,14 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
@@ -2394,34 +2090,26 @@ define void @shl_v8i32(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #88]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp, #8]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #80]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    ldp w9, w10, [sp]
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w11, w10, w8
 ; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    lsl w8, w9, w8
 ; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index c5da8049736b5..d5bbbfa9899da 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -280,7 +280,6 @@ define void @select_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    ldp q2, q1, [x0]
 ; CHECK-NEXT:    mov w8, #16 // =0x10
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    cmpne p1.b, p0/z, z1.b, z0.b
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, z3.b
 ; CHECK-NEXT:    st1b { z0.b }, p1, [x0, x8]
@@ -977,12 +976,10 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
 ; NONEON-NOSVE-NEXT:    str d2, [sp, #40]
 ; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #40]
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [sp]
-; NONEON-NOSVE-NEXT:    // kill: def $x9 killed $w9
-; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    ldr x10, [sp, #24]
 ; NONEON-NOSVE-NEXT:    ldr x11, [sp, #8]
+; NONEON-NOSVE-NEXT:    sbfx x8, x8, #0, #1
 ; NONEON-NOSVE-NEXT:    sbfx x9, x9, #0, #1
 ; NONEON-NOSVE-NEXT:    cmp x8, #0
 ; NONEON-NOSVE-NEXT:    csel x8, x11, x10, ne
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index fde5debe8eda4..2a15b042c1512 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -99,7 +99,6 @@ define <2 x half> @load_v2f16(ptr %a) {
 ; CHECK-LABEL: load_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
-; CHECK-NEXT:    // kill: def $d0 killed $s0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v2f16:
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 4d8d76416b856..45f2dbc98b953 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -36,7 +36,6 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; CHECK-NEXT:    mov w8, #1 // =0x1
 ; CHECK-NEXT:    index z2.d, #0, #1
 ; CHECK-NEXT:    mov z1.d, z1.d[1]
-; CHECK-NEXT:    // kill: def $x8 killed $w8
 ; CHECK-NEXT:    mov z3.d, x8
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
@@ -53,7 +52,6 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; NONEON-NOSVE-NEXT:    ldr q0, [x0]
 ; NONEON-NOSVE-NEXT:    ldr q1, [x1]
 ; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    str q0, [sp, #112]
 ; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #112]
 ; NONEON-NOSVE-NEXT:    cmp x9, #0
@@ -138,7 +136,6 @@ define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; NONEON-NOSVE-NEXT:    ldr q1, [x0]
 ; NONEON-NOSVE-NEXT:    ldr q0, [x1]
 ; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
-; NONEON-NOSVE-NEXT:    // kill: def $x8 killed $w8
 ; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #64]
 ; NONEON-NOSVE-NEXT:    cmp x9, #0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
index 41a9b4b292d0b..4e72615a9446b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
@@ -768,8 +768,6 @@ define void @fadd_v2f16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ldr s1, [x1]
-; CHECK-NEXT:    // kill: def $d0 killed $s0
-; CHECK-NEXT:    // kill: def $d1 killed $s1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index 1fe81b8697122..dc1781663a851 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -650,10 +650,11 @@ define void @shuffle_ext_byone_v4f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 80
 ; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldr d1, [sp]
-; NONEON-NOSVE-NEXT:    ldp d3, d2, [sp, #48]
+; NONEON-NOSVE-NEXT:    ldp d0, d2, [sp, #48]
+; NONEON-NOSVE-NEXT:    str d0, [sp, #72]
 ; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
 ; NONEON-NOSVE-NEXT:    stp d2, d1, [sp, #16]
-; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #64]
+; NONEON-NOSVE-NEXT:    str d0, [sp, #64]
 ; NONEON-NOSVE-NEXT:    ldr q1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #64]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
index 6e89e66e0e3b4..a856b7690337b 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll
@@ -174,8 +174,6 @@ define <vscale x 16 x i1> @whilehs_b_ii() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #8 // =0x8
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    whilehs p0.b, x9, x8
 ; CHECK-NEXT:    ret
 entry:
@@ -354,8 +352,6 @@ define <vscale x 16 x i1> @whilehi_b_ii() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #8 // =0x8
-; CHECK-NEXT:    // kill: def $x8 killed $w8
-; CHECK-NEXT:    // kill: def $x9 killed $w9
 ; CHECK-NEXT:    whilehi p0.b, x9, x8
 ; CHECK-NEXT:    ret
 entry:



More information about the llvm-branch-commits mailing list