[llvm] 61510b5 - Revert "[AArch64] Enable subreg liveness tracking by default."

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 12 09:22:39 PST 2024


Author: Sander de Smalen
Date: 2024-12-12T17:22:15Z
New Revision: 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9

URL: https://github.com/llvm/llvm-project/commit/61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9
DIFF: https://github.com/llvm/llvm-project/commit/61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9.diff

LOG: Revert "[AArch64] Enable subreg liveness tracking by default."

This reverts commit 9c319d5bb40785c969d2af76535ca62448dfafa7.

Some issues were discovered with the bootstrap builds, which
seem like they were caused by this commit. I'm reverting to investigate.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64Subtarget.cpp
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
    llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
    llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
    llvm/test/CodeGen/AArch64/aarch64-addv.ll
    llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
    llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
    llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
    llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
    llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
    llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
    llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
    llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
    llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
    llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
    llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
    llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
    llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
    llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
    llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
    llvm/test/CodeGen/AArch64/aarch64-mops.ll
    llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
    llvm/test/CodeGen/AArch64/aarch64-mulv.ll
    llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
    llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
    llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
    llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
    llvm/test/CodeGen/AArch64/abs.ll
    llvm/test/CodeGen/AArch64/active_lane_mask.ll
    llvm/test/CodeGen/AArch64/adc.ll
    llvm/test/CodeGen/AArch64/add-extract.ll
    llvm/test/CodeGen/AArch64/add.ll
    llvm/test/CodeGen/AArch64/addimm-mulimm.ll
    llvm/test/CodeGen/AArch64/addp-shuffle.ll
    llvm/test/CodeGen/AArch64/addsub_ext.ll
    llvm/test/CodeGen/AArch64/and-mask-removal.ll
    llvm/test/CodeGen/AArch64/andorxor.ll
    llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
    llvm/test/CodeGen/AArch64/arm64-addp.ll
    llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
    llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
    llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
    llvm/test/CodeGen/AArch64/arm64-build-vector.ll
    llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
    llvm/test/CodeGen/AArch64/arm64-dup.ll
    llvm/test/CodeGen/AArch64/arm64-ext.ll
    llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
    llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
    llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
    llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
    llvm/test/CodeGen/AArch64/arm64-fmax.ll
    llvm/test/CodeGen/AArch64/arm64-fp128.ll
    llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
    llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
    llvm/test/CodeGen/AArch64/arm64-ld1.ll
    llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
    llvm/test/CodeGen/AArch64/arm64-mul.ll
    llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
    llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
    llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
    llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
    llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
    llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
    llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
    llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
    llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
    llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
    llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
    llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
    llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
    llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
    llvm/test/CodeGen/AArch64/arm64-nvcast.ll
    llvm/test/CodeGen/AArch64/arm64-popcnt.ll
    llvm/test/CodeGen/AArch64/arm64-rev.ll
    llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
    llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
    llvm/test/CodeGen/AArch64/arm64-stp.ll
    llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
    llvm/test/CodeGen/AArch64/arm64-tbl.ll
    llvm/test/CodeGen/AArch64/arm64-vadd.ll
    llvm/test/CodeGen/AArch64/arm64-vaddv.ll
    llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
    llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
    llvm/test/CodeGen/AArch64/arm64-vmul.ll
    llvm/test/CodeGen/AArch64/arm64-zip.ll
    llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
    llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
    llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
    llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
    llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
    llvm/test/CodeGen/AArch64/atomic-ops.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
    llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
    llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
    llvm/test/CodeGen/AArch64/bf16-instructions.ll
    llvm/test/CodeGen/AArch64/bf16-select.ll
    llvm/test/CodeGen/AArch64/bf16-shuffle.ll
    llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
    llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
    llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
    llvm/test/CodeGen/AArch64/bitcast.ll
    llvm/test/CodeGen/AArch64/bitfield-insert.ll
    llvm/test/CodeGen/AArch64/bswap.ll
    llvm/test/CodeGen/AArch64/build-one-lane.ll
    llvm/test/CodeGen/AArch64/build-vector-extract.ll
    llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
    llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
    llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
    llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
    llvm/test/CodeGen/AArch64/combine-andintoload.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
    llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
    llvm/test/CodeGen/AArch64/concat-vector.ll
    llvm/test/CodeGen/AArch64/concatbinop.ll
    llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
    llvm/test/CodeGen/AArch64/dag-numsignbits.ll
    llvm/test/CodeGen/AArch64/dup.ll
    llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
    llvm/test/CodeGen/AArch64/ext-narrow-index.ll
    llvm/test/CodeGen/AArch64/extbinopload.ll
    llvm/test/CodeGen/AArch64/extract-bits.ll
    llvm/test/CodeGen/AArch64/extract-insert.ll
    llvm/test/CodeGen/AArch64/extract-lowbits.ll
    llvm/test/CodeGen/AArch64/extract-sext-zext.ll
    llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
    llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
    llvm/test/CodeGen/AArch64/extract-vector-elt.ll
    llvm/test/CodeGen/AArch64/f16-instructions.ll
    llvm/test/CodeGen/AArch64/fabs-fp128.ll
    llvm/test/CodeGen/AArch64/fabs.ll
    llvm/test/CodeGen/AArch64/faddp-half.ll
    llvm/test/CodeGen/AArch64/faddp.ll
    llvm/test/CodeGen/AArch64/faddsub.ll
    llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
    llvm/test/CodeGen/AArch64/fast-isel-gep.ll
    llvm/test/CodeGen/AArch64/fast-isel-shift.ll
    llvm/test/CodeGen/AArch64/fcmp.ll
    llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
    llvm/test/CodeGen/AArch64/fcopysign.ll
    llvm/test/CodeGen/AArch64/fcvt.ll
    llvm/test/CodeGen/AArch64/fcvt_combine.ll
    llvm/test/CodeGen/AArch64/fdiv-combine.ll
    llvm/test/CodeGen/AArch64/fdiv.ll
    llvm/test/CodeGen/AArch64/fexplog.ll
    llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
    llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
    llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
    llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
    llvm/test/CodeGen/AArch64/fminimummaximum.ll
    llvm/test/CodeGen/AArch64/fminmax.ll
    llvm/test/CodeGen/AArch64/fmla.ll
    llvm/test/CodeGen/AArch64/fmul.ll
    llvm/test/CodeGen/AArch64/fneg.ll
    llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
    llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
    llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
    llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
    llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
    llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
    llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
    llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
    llvm/test/CodeGen/AArch64/fpext.ll
    llvm/test/CodeGen/AArch64/fpmode.ll
    llvm/test/CodeGen/AArch64/fpow.ll
    llvm/test/CodeGen/AArch64/fpowi.ll
    llvm/test/CodeGen/AArch64/fptoi.ll
    llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
    llvm/test/CodeGen/AArch64/fptrunc.ll
    llvm/test/CodeGen/AArch64/freeze.ll
    llvm/test/CodeGen/AArch64/frem-power2.ll
    llvm/test/CodeGen/AArch64/frem.ll
    llvm/test/CodeGen/AArch64/fsincos.ll
    llvm/test/CodeGen/AArch64/fsqrt.ll
    llvm/test/CodeGen/AArch64/funnel-shift.ll
    llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
    llvm/test/CodeGen/AArch64/get_vector_length.ll
    llvm/test/CodeGen/AArch64/half.ll
    llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
    llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
    llvm/test/CodeGen/AArch64/icmp.ll
    llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
    llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
    llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
    llvm/test/CodeGen/AArch64/insert-extend.ll
    llvm/test/CodeGen/AArch64/insert-subvector.ll
    llvm/test/CodeGen/AArch64/insertextract.ll
    llvm/test/CodeGen/AArch64/insertshuffleload.ll
    llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
    llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
    llvm/test/CodeGen/AArch64/itofp-bf16.ll
    llvm/test/CodeGen/AArch64/itofp.ll
    llvm/test/CodeGen/AArch64/ldexp.ll
    llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
    llvm/test/CodeGen/AArch64/llrint-conv.ll
    llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
    llvm/test/CodeGen/AArch64/llvm.exp10.ll
    llvm/test/CodeGen/AArch64/llvm.frexp.ll
    llvm/test/CodeGen/AArch64/llvm.sincos.ll
    llvm/test/CodeGen/AArch64/load.ll
    llvm/test/CodeGen/AArch64/logic-shift.ll
    llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
    llvm/test/CodeGen/AArch64/lrint-conv.ll
    llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
    llvm/test/CodeGen/AArch64/lslfast.ll
    llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
    llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
    llvm/test/CodeGen/AArch64/memset-inline.ll
    llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
    llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
    llvm/test/CodeGen/AArch64/mla_mls_merge.ll
    llvm/test/CodeGen/AArch64/mul.ll
    llvm/test/CodeGen/AArch64/neon-bitcast.ll
    llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
    llvm/test/CodeGen/AArch64/neon-dot-product.ll
    llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
    llvm/test/CodeGen/AArch64/neon-extract.ll
    llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
    llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
    llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
    llvm/test/CodeGen/AArch64/neon-luti.ll
    llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
    llvm/test/CodeGen/AArch64/neon-perm.ll
    llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
    llvm/test/CodeGen/AArch64/neon-rshrn.ll
    llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
    llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
    llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
    llvm/test/CodeGen/AArch64/neon-truncstore.ll
    llvm/test/CodeGen/AArch64/neon-vcmla.ll
    llvm/test/CodeGen/AArch64/neon-wide-splat.ll
    llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
    llvm/test/CodeGen/AArch64/nontemporal-load.ll
    llvm/test/CodeGen/AArch64/nontemporal.ll
    llvm/test/CodeGen/AArch64/phi.ll
    llvm/test/CodeGen/AArch64/popcount.ll
    llvm/test/CodeGen/AArch64/pow.ll
    llvm/test/CodeGen/AArch64/pr-cf624b2.ll
    llvm/test/CodeGen/AArch64/pr58350.ll
    llvm/test/CodeGen/AArch64/pr58431.ll
    llvm/test/CodeGen/AArch64/pr61111.ll
    llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
    llvm/test/CodeGen/AArch64/ptradd.ll
    llvm/test/CodeGen/AArch64/qmovn.ll
    llvm/test/CodeGen/AArch64/rcpc3.ll
    llvm/test/CodeGen/AArch64/reduce-and.ll
    llvm/test/CodeGen/AArch64/reduce-or.ll
    llvm/test/CodeGen/AArch64/reduce-shuffle.ll
    llvm/test/CodeGen/AArch64/reduce-xor.ll
    llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
    llvm/test/CodeGen/AArch64/rem.ll
    llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
    llvm/test/CodeGen/AArch64/select-constant-xor.ll
    llvm/test/CodeGen/AArch64/seqpairspill.mir
    llvm/test/CodeGen/AArch64/setcc_knownbits.ll
    llvm/test/CodeGen/AArch64/sext.ll
    llvm/test/CodeGen/AArch64/shift-amount-mod.ll
    llvm/test/CodeGen/AArch64/shift-by-signext.ll
    llvm/test/CodeGen/AArch64/shift-mod.ll
    llvm/test/CodeGen/AArch64/shift.ll
    llvm/test/CodeGen/AArch64/shift_minsize.ll
    llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
    llvm/test/CodeGen/AArch64/shuffles.ll
    llvm/test/CodeGen/AArch64/shufflevector.ll
    llvm/test/CodeGen/AArch64/sink-and-fold.ll
    llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
    llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
    llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
    llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
    llvm/test/CodeGen/AArch64/sme-streaming-body.ll
    llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
    llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
    llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
    llvm/test/CodeGen/AArch64/smul_fix_sat.ll
    llvm/test/CodeGen/AArch64/spill-fold.mir
    llvm/test/CodeGen/AArch64/split-vector-insert.ll
    llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
    llvm/test/CodeGen/AArch64/store.ll
    llvm/test/CodeGen/AArch64/sub.ll
    llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
    llvm/test/CodeGen/AArch64/sve-doublereduct.ll
    llvm/test/CodeGen/AArch64/sve-extract-element.ll
    llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
    llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
    llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
    llvm/test/CodeGen/AArch64/sve-fadda-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
    llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
    llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
    llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
    llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
    llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
    llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
    llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
    llvm/test/CodeGen/AArch64/sve-insert-element.ll
    llvm/test/CodeGen/AArch64/sve-insert-vector.ll
    llvm/test/CodeGen/AArch64/sve-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sve-merging-stores.ll
    llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
    llvm/test/CodeGen/AArch64/sve-pr62151.ll
    llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
    llvm/test/CodeGen/AArch64/sve-select.ll
    llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
    llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
    llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
    llvm/test/CodeGen/AArch64/sve-vector-compress.ll
    llvm/test/CodeGen/AArch64/sve-vector-splat.ll
    llvm/test/CodeGen/AArch64/sve-vl-arith.ll
    llvm/test/CodeGen/AArch64/sve2-histcnt.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
    llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
    llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
    llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
    llvm/test/CodeGen/AArch64/tbl-loops.ll
    llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
    llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
    llvm/test/CodeGen/AArch64/umul_fix_sat.ll
    llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
    llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
    llvm/test/CodeGen/AArch64/vec-libcalls.ll
    llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-add.ll
    llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-bool.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
    llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
    llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
    llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
    llvm/test/CodeGen/AArch64/vector-compress.ll
    llvm/test/CodeGen/AArch64/vector-fcopysign.ll
    llvm/test/CodeGen/AArch64/vector-fcvt.ll
    llvm/test/CodeGen/AArch64/vector-llrint.ll
    llvm/test/CodeGen/AArch64/vector-lrint.ll
    llvm/test/CodeGen/AArch64/vldn_shuffle.ll
    llvm/test/CodeGen/AArch64/win64-fpowi.ll
    llvm/test/CodeGen/AArch64/win64_vararg.ll
    llvm/test/CodeGen/AArch64/xtn.ll
    llvm/test/CodeGen/AArch64/zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index cd4cd0fdd3747a..3767b34bd5b0c5 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -90,7 +90,7 @@ static cl::alias AArch64StreamingStackHazardSize(
 // are ironed out. This option allows the feature to be used in tests.
 static cl::opt<bool>
     EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking",
-                                 cl::init(true), cl::Hidden,
+                                 cl::init(false), cl::Hidden,
                                  cl::desc("Enable subreg liveness tracking"));
 
 static cl::opt<bool>

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index 444f579f232420..a1712a5ec7a27c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    casp x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    casp x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspa x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspa x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspl x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspl x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index 62af028defde56..ee5fbe39b4492c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -1616,7 +1616,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1637,7 +1637,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1658,7 +1658,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1679,7 +1679,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -1700,7 +1700,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -2343,10 +2343,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    casp x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    casp x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2368,10 +2368,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspa x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspa x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2393,10 +2393,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspl x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspl x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2418,10 +2418,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2443,10 +2443,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -2996,7 +2996,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3017,7 +3017,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3038,7 +3038,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3059,7 +3059,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3080,7 +3080,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3531,7 +3531,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3552,7 +3552,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3573,7 +3573,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3594,7 +3594,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -3615,7 +3615,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
 ; -O1:    ccmp x4, x6, #0, eq
@@ -4072,7 +4072,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4100,7 +4100,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4128,7 +4128,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4156,7 +4156,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4184,7 +4184,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lt
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4745,7 +4745,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4773,7 +4773,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4801,7 +4801,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, ge
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5362,7 +5362,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5390,7 +5390,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5418,7 +5418,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5446,7 +5446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -5474,7 +5474,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, lo
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6002,7 +6002,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6030,7 +6030,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6058,7 +6058,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7
@@ -6114,7 +6114,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    cmp x2, x4
-; -O1:    csel x9, x5, x3, hs
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x5, x7

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
index f043f99327308b..83e383f335637c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll
@@ -517,7 +517,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -534,7 +534,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -551,7 +551,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -568,7 +568,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -585,7 +585,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1119,7 +1119,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1136,7 +1136,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1153,7 +1153,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1170,7 +1170,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -2356,10 +2356,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    casp x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    casp x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2379,10 +2379,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspa x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspa x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2402,10 +2402,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspl x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspl x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2425,10 +2425,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2448,10 +2448,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3479,7 +3479,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3498,7 +3498,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3517,7 +3517,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3536,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3555,7 +3555,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -4004,8 +4004,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4025,8 +4025,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4046,8 +4046,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4067,8 +4067,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4088,8 +4088,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4589,8 +4589,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4610,8 +4610,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4631,8 +4631,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4652,8 +4652,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4673,8 +4673,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5174,8 +5174,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5195,8 +5195,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5216,8 +5216,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5237,8 +5237,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5258,8 +5258,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5759,8 +5759,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5780,8 +5780,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5801,8 +5801,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5822,8 +5822,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5843,8 +5843,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
index df7b57e7e18f46..0c3ed9b0f1de0f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll
@@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -559,7 +559,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -576,7 +576,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -593,7 +593,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -610,7 +610,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    adds x9, x5, x3
+; -O1:    adds x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1127,7 +1127,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1144,7 +1144,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1161,7 +1161,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1178,7 +1178,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1195,7 +1195,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    subs x9, x5, x3
+; -O1:    subs x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1674,7 +1674,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1693,7 +1693,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1712,7 +1712,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1731,7 +1731,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -1750,7 +1750,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
+; -O1:    and x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -2406,10 +2406,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    casp x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    casp x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16
@@ -2429,10 +2429,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspa x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspa x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16
@@ -2452,10 +2452,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspl x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspl x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value release, align 16
@@ -2475,10 +2475,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16
@@ -2498,10 +2498,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    and x8, x4, x2
-; -O1:    and x9, x5, x3
-; -O1:    mvn x8, x8
-; -O1:    mvn x9, x9
-; -O1:    caspal x4, x5, x8, x9, [x0]
+; -O1:    and x9, x7, x3
+; -O1:    mvn x10, x8
+; -O1:    mvn x11, x9
+; -O1:    caspal x4, x5, x10, x11, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
     %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16
@@ -3049,7 +3049,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value
 ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3087,7 +3087,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3106,7 +3106,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3125,7 +3125,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    orr x8, x4, x2
-; -O1:    orr x9, x5, x3
+; -O1:    orr x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3574,7 +3574,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3593,7 +3593,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3612,7 +3612,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3631,7 +3631,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -3650,7 +3650,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
 ; -O1:    eor x8, x4, x2
-; -O1:    eor x9, x5, x3
+; -O1:    eor x9, x7, x3
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
 ; -O1:    ccmp x5, x7, #0, eq
@@ -4099,8 +4099,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4120,8 +4120,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4141,8 +4141,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4162,8 +4162,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4183,8 +4183,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lt
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lt
 ; -O1:    csel x8, x4, x2, lt
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4684,8 +4684,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4705,8 +4705,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4726,8 +4726,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4747,8 +4747,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -4768,8 +4768,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value)
 ;
 ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, ge
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, ge
 ; -O1:    csel x8, x4, x2, ge
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5269,8 +5269,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5290,8 +5290,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5311,8 +5311,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5332,8 +5332,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5353,8 +5353,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, lo
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, lo
 ; -O1:    csel x8, x4, x2, lo
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5854,8 +5854,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    casp x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspa x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5896,8 +5896,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspl x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5917,8 +5917,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6
@@ -5938,8 +5938,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value
 ;
 ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
-; -O1:    cmp x3, x5
-; -O1:    csel x9, x5, x3, hs
+; -O1:    cmp x3, x7
+; -O1:    csel x9, x7, x3, hs
 ; -O1:    csel x8, x4, x2, hs
 ; -O1:    caspal x4, x5, x8, x9, [x0]
 ; -O1:    cmp x4, x6

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
index 80310a11add697..1fe63c9be8c629 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
@@ -53,6 +53,10 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap:
 ; CHECK-CAS-O1:       // %bb.0:
+; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspa x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -176,6 +180,10 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst:
 ; CHECK-CAS-O1:       // %bb.0:
+; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -299,6 +307,10 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire:
 ; CHECK-CAS-O1:       // %bb.0:
+; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -422,6 +434,10 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic:
 ; CHECK-CAS-O1:       // %bb.0:
+; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov v0.d[0], x2
 ; CHECK-CAS-O1-NEXT:    mov v0.d[1], x3
@@ -642,6 +658,10 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; CHECK-CAS-O1-LABEL: val_compare_and_swap_return:
 ; CHECK-CAS-O1:       // %bb.0:
+; CHECK-CAS-O1-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; CHECK-CAS-O1-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; CHECK-CAS-O1-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
 ; CHECK-CAS-O1-NEXT:    caspa x2, x3, x4, x5, [x0]
 ; CHECK-CAS-O1-NEXT:    mov x0, x2
 ; CHECK-CAS-O1-NEXT:    mov x1, x3

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 842dd353bc6b22..de3f323891a36a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -1938,6 +1938,7 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
 define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NOLSE-O1-NEXT:  LBB28_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w8, [x0]
@@ -2992,6 +2993,7 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
 define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
 ; CHECK-NOLSE-O1-LABEL: atomicrmw_xchg_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NOLSE-O1-NEXT:  LBB38_1: ; %atomicrmw.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w8, [x0]
@@ -4052,6 +4054,7 @@ define i32 @atomicrmw_xchg_i32(ptr %ptr, i32 %rhs) {
 ; CHECK-NOLSE-O1-NEXT:    stxr w9, w1, [x8]
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB48_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.2: ; %atomicrmw.end
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: atomicrmw_xchg_i32:
@@ -5993,6 +5996,7 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i8:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NOLSE-O1-NEXT:  LBB67_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrb w0, [x8]
@@ -6005,10 +6009,12 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB67_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
 ; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB67_4: ; %cmpxchg.nostore
 ; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i8:
@@ -6097,6 +6103,7 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-NOLSE-O1-LABEL: cmpxchg_i16:
 ; CHECK-NOLSE-O1:       ; %bb.0:
 ; CHECK-NOLSE-O1-NEXT:    mov x8, x0
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NOLSE-O1-NEXT:  LBB68_1: ; %cmpxchg.start
 ; CHECK-NOLSE-O1-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NOLSE-O1-NEXT:    ldxrh w0, [x8]
@@ -6109,10 +6116,12 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB68_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
 ; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB68_4: ; %cmpxchg.nostore
 ; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i16:
@@ -6212,10 +6221,12 @@ define { i32, i1 } @cmpxchg_i32(ptr %ptr, i32 %desired, i32 %new) {
 ; CHECK-NOLSE-O1-NEXT:    cbnz w9, LBB69_1
 ; CHECK-NOLSE-O1-NEXT:  ; %bb.3:
 ; CHECK-NOLSE-O1-NEXT:    mov w1, #1 ; =0x1
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ; CHECK-NOLSE-O1-NEXT:  LBB69_4: ; %cmpxchg.nostore
 ; CHECK-NOLSE-O1-NEXT:    mov w1, wzr
 ; CHECK-NOLSE-O1-NEXT:    clrex
+; CHECK-NOLSE-O1-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOLSE-O1-NEXT:    ret
 ;
 ; CHECK-OUTLINE-O1-LABEL: cmpxchg_i32:
@@ -6550,36 +6561,25 @@ define internal float @bitcast_to_float(ptr %ptr) {
 }
 
 define internal half @bitcast_to_half(ptr %ptr) {
-; CHECK-NOLSE-O1-LABEL: bitcast_to_half:
-; CHECK-NOLSE-O1:       ; %bb.0:
-; CHECK-NOLSE-O1-NEXT:    ldarh w8, [x0]
-; CHECK-NOLSE-O1-NEXT:    fmov s0, w8
-; CHECK-NOLSE-O1-NEXT:    ret
-;
-; CHECK-OUTLINE-O1-LABEL: bitcast_to_half:
-; CHECK-OUTLINE-O1:       ; %bb.0:
-; CHECK-OUTLINE-O1-NEXT:    ldarh w8, [x0]
-; CHECK-OUTLINE-O1-NEXT:    fmov s0, w8
-; CHECK-OUTLINE-O1-NEXT:    ret
-;
-; CHECK-NOLSE-O0-LABEL: bitcast_to_half:
-; CHECK-NOLSE-O0:       ; %bb.0:
-; CHECK-NOLSE-O0-NEXT:    ldarh w8, [x0]
-; CHECK-NOLSE-O0-NEXT:    fmov s0, w8
-; CHECK-NOLSE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
-; CHECK-NOLSE-O0-NEXT:    ret
+; CHECK-NOLSE-LABEL: bitcast_to_half:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldarh w8, [x0]
+; CHECK-NOLSE-NEXT:    fmov s0, w8
+; CHECK-NOLSE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-NOLSE-NEXT:    ret
 ;
-; CHECK-OUTLINE-O0-LABEL: bitcast_to_half:
-; CHECK-OUTLINE-O0:       ; %bb.0:
-; CHECK-OUTLINE-O0-NEXT:    ldarh w8, [x0]
-; CHECK-OUTLINE-O0-NEXT:    fmov s0, w8
-; CHECK-OUTLINE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
-; CHECK-OUTLINE-O0-NEXT:    ret
+; CHECK-OUTLINE-LABEL: bitcast_to_half:
+; CHECK-OUTLINE:       ; %bb.0:
+; CHECK-OUTLINE-NEXT:    ldarh w8, [x0]
+; CHECK-OUTLINE-NEXT:    fmov s0, w8
+; CHECK-OUTLINE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-OUTLINE-NEXT:    ret
 ;
 ; CHECK-LSE-O1-LABEL: bitcast_to_half:
 ; CHECK-LSE-O1:       ; %bb.0:
 ; CHECK-LSE-O1-NEXT:    ldarh w8, [x0]
 ; CHECK-LSE-O1-NEXT:    fmov s0, w8
+; CHECK-LSE-O1-NEXT:    ; kill: def $h0 killed $h0 killed $s0
 ; CHECK-LSE-O1-NEXT:    ret
 ;
 ; CHECK-LSE-O0-LABEL: bitcast_to_half:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index 7efc31f0cf2597..c6819ff39ed33e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -12,13 +12,13 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
@@ -26,14 +26,14 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.cmpxchg.nostore:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0
   %val = extractvalue { i32, i1 } %pair, 0
@@ -46,19 +46,19 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0, $x2
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, pcsections !0 :: (load (s32) from %ir.pnew)
+  ; CHECK-NEXT:   renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x9:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x9
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0, $x8:0x0000000000000040, $x9:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x8, $x9
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w10 = STXRW renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1
@@ -66,14 +66,14 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.cmpxchg.nostore:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %new = load i32, ptr %pnew, !pcsections !0
   %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acquire acquire, !pcsections !0
@@ -91,13 +91,13 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $w1, $w2, $x0, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STLXRW renamable $w2, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
@@ -105,14 +105,14 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.cmpxchg.nostore:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CLREX 15, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %pair = cmpxchg ptr %p, i32 %cmp, i32 %new acq_rel monotonic, !pcsections !0
   %val = extractvalue { i32, i1 } %pair, 0
@@ -243,16 +243,16 @@ define i32 @fetch_and_nand(ptr %p) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
   ; CHECK-NEXT:   $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %val = atomicrmw nand ptr %p, i32 7 release, !pcsections !0
   ret i32 %val
@@ -295,15 +295,15 @@ define i32 @fetch_and_or(ptr %p) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, pcsections !0 :: (volatile load (s32) from %ir.p)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
   ; CHECK-NEXT:   $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
   ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %val = atomicrmw or ptr %p, i32 5 seq_cst, !pcsections !0
   ret i32 %val
@@ -726,15 +726,15 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst, !pcsections !0
   ret i8 %res
@@ -746,18 +746,20 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $w1 = KILL $w1, implicit-def $x1
+  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x0, $x1
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xchg ptr %ptr, i8 %rhs monotonic, !pcsections !0
   ret i8 %res
@@ -773,15 +775,15 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw sub ptr %ptr, i8 %rhs acquire, !pcsections !0
   ret i8 %res
@@ -797,15 +799,15 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw and ptr %ptr, i8 %rhs release, !pcsections !0
   ret i8 %res
@@ -821,15 +823,15 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw or ptr %ptr, i8 %rhs seq_cst, !pcsections !0
   ret i8 %res
@@ -845,15 +847,15 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xor ptr %ptr, i8 %rhs monotonic, !pcsections !0
   ret i8 %res
@@ -869,17 +871,17 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw min ptr %ptr, i8 %rhs acquire, !pcsections !0
   ret i8 %res
@@ -895,17 +897,17 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw max ptr %ptr, i8 %rhs release, !pcsections !0
   ret i8 %res
@@ -923,11 +925,11 @@ define i8 @atomicrmw_umin_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 7
+  ; CHECK-NEXT:   renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRB killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -951,11 +953,11 @@ define i8 @atomicrmw_umax_i8(ptr %ptr, i8 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 7
+  ; CHECK-NEXT:   renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 7, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRB killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRB renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -977,15 +979,15 @@ define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst, !pcsections !0
   ret i16 %res
@@ -997,18 +999,20 @@ define i16 @atomicrmw_xchg_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $w1 = KILL $w1, implicit-def $x1
+  ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.atomicrmw.start:
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
-  ; CHECK-NEXT:   liveins: $x0, $x1:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x0, $x1
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xchg ptr %ptr, i16 %rhs monotonic, !pcsections !0
   ret i16 %res
@@ -1024,15 +1028,15 @@ define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw sub ptr %ptr, i16 %rhs acquire, !pcsections !0
   ret i16 %res
@@ -1048,15 +1052,15 @@ define i16 @atomicrmw_and_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw and ptr %ptr, i16 %rhs release, !pcsections !0
   ret i16 %res
@@ -1072,15 +1076,15 @@ define i16 @atomicrmw_or_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw or ptr %ptr, i16 %rhs seq_cst, !pcsections !0
   ret i16 %res
@@ -1096,15 +1100,15 @@ define i16 @atomicrmw_xor_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw xor ptr %ptr, i16 %rhs monotonic, !pcsections !0
   ret i16 %res
@@ -1120,17 +1124,17 @@ define i16 @atomicrmw_min_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw min ptr %ptr, i16 %rhs acquire, !pcsections !0
   ret i16 %res
@@ -1146,17 +1150,17 @@ define i16 @atomicrmw_max_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w1, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = SBFMWri renamable $w8, 0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 40, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w10 = STLXRH renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w10, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
-  ; CHECK-NEXT:   liveins: $x8:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, killed $w8, 0
+  ; CHECK-NEXT:   $w0 = ORRWrs $wzr, $w8, 0, implicit killed $x8
   ; CHECK-NEXT:   RET undef $lr, implicit $w0
   %res = atomicrmw max ptr %ptr, i16 %rhs release, !pcsections !0
   ret i16 %res
@@ -1174,11 +1178,11 @@ define i16 @atomicrmw_umin_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 15
+  ; CHECK-NEXT:   renamable $w8 = LDAXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRH killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 3, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STLXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1202,11 +1206,11 @@ define i16 @atomicrmw_umax_i16(ptr %ptr, i16 %rhs) {
   ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.2(0x04000000)
   ; CHECK-NEXT:   liveins: $w9, $x0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
-  ; CHECK-NEXT:   renamable $w8 = ANDWri killed renamable $w8, 15
+  ; CHECK-NEXT:   renamable $w8 = LDXRH renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w8 = ANDWri renamable $w8, 15, implicit killed $x8
   ; CHECK-NEXT:   $wzr = SUBSWrs renamable $w8, renamable $w9, 0, implicit-def $nzcv, pcsections !0
-  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, pcsections !0
-  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRH killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s16) into %ir.ptr)
+  ; CHECK-NEXT:   renamable $w10 = CSELWr renamable $w8, renamable $w9, 8, implicit killed $nzcv, implicit-def $x10, pcsections !0
+  ; CHECK-NEXT:   early-clobber renamable $w11 = STXRH renamable $w10, renamable $x0, implicit killed $x10, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w11, %bb.1, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.atomicrmw.end:
@@ -1225,34 +1229,37 @@ define { i8, i1 } @cmpxchg_i8(ptr %ptr, i8 %desired, i8 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
+  ; CHECK-NEXT:   renamable $w2 = KILL $w2, implicit-def $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x2:0x0000000000000040, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x2, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w0 = LDXRB renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s8) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 7, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0:0x0000000000000040, $x2:0x0000000000000040, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRB renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s8) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w1 = MOVZWi 1, 0
+  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.nostore:
-  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $w1 = ORRWrs $wzr, $wzr, 0
   ; CHECK-NEXT:   CLREX 15, pcsections !0
+  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   %res = cmpxchg ptr %ptr, i8 %desired, i8 %new monotonic monotonic, !pcsections !0
   ret { i8, i1 } %res
@@ -1265,34 +1272,37 @@ define { i16, i1 } @cmpxchg_i16(ptr %ptr, i16 %desired, i16 %new) {
   ; CHECK-NEXT:   liveins: $w1, $w2, $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x8 = ORRXrs $xzr, $x0, 0
+  ; CHECK-NEXT:   renamable $w2 = KILL $w2, implicit-def $x2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1.cmpxchg.start:
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.4(0x04000000)
-  ; CHECK-NEXT:   liveins: $w1, $x2:0x0000000000000040, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x2, $x8
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, pcsections !0 :: (volatile load (s16) from %ir.ptr)
+  ; CHECK-NEXT:   renamable $w0 = LDXRH renamable $x8, implicit-def $x0, pcsections !0 :: (volatile load (s16) from %ir.ptr)
   ; CHECK-NEXT:   renamable $w9 = ANDWri renamable $w0, 15, pcsections !0
   ; CHECK-NEXT:   dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0
   ; CHECK-NEXT:   Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.cmpxchg.trystore:
   ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.1(0x7c000000)
-  ; CHECK-NEXT:   liveins: $w1, $x0:0x0000000000000040, $x2:0x0000000000000040, $x8
+  ; CHECK-NEXT:   liveins: $w1, $x0, $x2, $x8
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   early-clobber renamable $w9 = STXRH renamable $w2, renamable $x8, pcsections !0 :: (volatile store (s16) into %ir.ptr)
   ; CHECK-NEXT:   CBNZW killed renamable $w9, %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3:
-  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $w1 = MOVZWi 1, 0
+  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.cmpxchg.nostore:
-  ; CHECK-NEXT:   liveins: $x0:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $w1 = ORRWrs $wzr, $wzr, 0
   ; CHECK-NEXT:   CLREX 15, pcsections !0
+  ; CHECK-NEXT:   $w0 = KILL renamable $w0, implicit killed $x0
   ; CHECK-NEXT:   RET undef $lr, implicit $w0, implicit $w1
   %res = cmpxchg ptr %ptr, i16 %desired, i16 %new monotonic monotonic, !pcsections !0
   ret { i16, i1 } %res

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
index 0157c429120a9e..b681e3b2231174 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll
@@ -252,6 +252,7 @@ define i32 @udiv_div_by_180(i32 %x)
 ; SDAG-NEXT:    movk w8, #364, lsl #16
 ; SDAG-NEXT:    umull x8, w9, w8
 ; SDAG-NEXT:    lsr x0, x8, #32
+; SDAG-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SDAG-NEXT:    ret
 ;
 ; GISEL-LABEL: udiv_div_by_180:
@@ -261,6 +262,7 @@ define i32 @udiv_div_by_180(i32 %x)
 ; GISEL-NEXT:    movk w8, #364, lsl #16
 ; GISEL-NEXT:    umull x8, w9, w8
 ; GISEL-NEXT:    lsr x0, x8, #32
+; GISEL-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; GISEL-NEXT:    ret
 {
   %truncate = and i32 %x, 255

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll b/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
index 1422e32c468bf9..e41b25fb2b88f1 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/insert-vector-elt-pr63826.ll
@@ -8,8 +8,10 @@ target triple = "arm64-apple-ios14.5.0"
 define <2 x i16> @pr63826_v2s16(<2 x i16> %vec) {
 ; CHECK-LABEL: pr63826_v2s16:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    mov.s v0[0], w8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %vec1 = insertelement <2 x i16> %vec, i16 1, i32 0
   ret <2 x i16> %vec1
@@ -18,8 +20,10 @@ define <2 x i16> @pr63826_v2s16(<2 x i16> %vec) {
 define <2 x i8> @pr63826_v2s8(<2 x i8> %vec) {
 ; CHECK-LABEL: pr63826_v2s8:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    mov.s v0[0], w8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %vec1 = insertelement <2 x i8> %vec, i8 1, i32 0
   ret <2 x i8> %vec1
@@ -28,8 +32,10 @@ define <2 x i8> @pr63826_v2s8(<2 x i8> %vec) {
 define <4 x i8> @pr63826_v4s8(<4 x i8> %vec) {
 ; CHECK-LABEL: pr63826_v4s8:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    mov.h v0[0], w8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %vec1 = insertelement <4 x i8> %vec, i8 1, i32 0
   ret <4 x i8> %vec1

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index a711588ab82c18..def4192b0e005d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -330,6 +330,7 @@ entry:
 define i16 @addv_v3i16(<3 x i16> %a) {
 ; CHECK-LABEL: addv_v3i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], wzr
 ; CHECK-NEXT:    addv h0, v0.4h
 ; CHECK-NEXT:    fmov w0, s0
@@ -433,6 +434,9 @@ entry:
 define i64 @addv_v3i64(<3 x i64> %a) {
 ; CHECK-LABEL: addv_v3i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v2.d[1], xzr
 ; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
index ddd19e61cdd9bb..4afe3626864401 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
@@ -1035,6 +1035,7 @@ define <2 x double> @test_v1f64(<1 x double> %0, ptr %1) {
 ; CHECK-LABEL: test_v1f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mvni v1.2s, #31, msl #16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    ext v0.16b, v1.16b, v1.16b, #8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll b/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
index 86cc39696f10ca..52b542790e82d5 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bf16-dotprod-intrinsics.ll
@@ -24,6 +24,7 @@ entry:
 define <2 x float> @test_vbfdot_lane_f32(<2 x float> %r, <4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfdot_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfdot v0.2s, v1.4h, v2.2h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -63,6 +64,7 @@ entry:
 define <4 x float> @test_vbfdotq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfdotq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfdot v0.4s, v1.8h, v2.2h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -106,6 +108,7 @@ entry:
 define <4 x float> @test_vbfmlalbq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfmlalbq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfmlalb v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -128,6 +131,7 @@ entry:
 define <4 x float> @test_vbfmlaltq_lane_f32(<4 x float> %r, <8 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-LABEL: test_vbfmlaltq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    bfmlalt v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
index 6da3743cf86f5d..9734ab35bd6b2d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll
@@ -14,6 +14,9 @@ define <1 x i8> @test_bitf_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
 ;
 ; CHECK-GI-LABEL: test_bitf_v1i8:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.b[0]
 ; CHECK-GI-NEXT:    umov w9, v1.b[0]
 ; CHECK-GI-NEXT:    umov w10, v0.b[0]
@@ -39,6 +42,9 @@ define <1 x i16> @test_bitf_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
 ;
 ; CHECK-GI-LABEL: test_bitf_v1i16:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.h[0]
 ; CHECK-GI-NEXT:    umov w9, v1.h[0]
 ; CHECK-GI-NEXT:    umov w10, v0.h[0]
@@ -71,6 +77,7 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
 ; CHECK-GI-NEXT:    and w8, w8, w10
 ; CHECK-GI-NEXT:    orr w8, w9, w8
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %neg = xor <1 x i32> %C, <i32 -1>
   %and = and <1 x i32> %neg, %B

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
index 28ef5deb3bd38a..45ad4b07ff66f7 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll
@@ -14,6 +14,9 @@ define <1 x i8> @test_bit_v1i8(<1 x i8> %A, <1 x i8> %B, <1 x i8> %C) {
 ;
 ; CHECK-GI-LABEL: test_bit_v1i8:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.b[0]
 ; CHECK-GI-NEXT:    umov w9, v1.b[0]
 ; CHECK-GI-NEXT:    umov w10, v0.b[0]
@@ -39,6 +42,9 @@ define <1 x i16> @test_bit_v1i16(<1 x i16> %A, <1 x i16> %B, <1 x i16> %C) {
 ;
 ; CHECK-GI-LABEL: test_bit_v1i16:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    umov w8, v2.h[0]
 ; CHECK-GI-NEXT:    umov w9, v1.h[0]
 ; CHECK-GI-NEXT:    umov w10, v0.h[0]
@@ -71,6 +77,7 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
 ; CHECK-GI-NEXT:    bic w8, w10, w8
 ; CHECK-GI-NEXT:    orr w8, w9, w8
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %and = and <1 x i32> %C, %B
   %neg = xor <1 x i32> %C, <i32 -1>

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
index 4c77090da14c6e..e086ab92421fb5 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll
@@ -5,7 +5,11 @@ define <2 x i64> @test_mul_add_2x64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: test_mul_add_2x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %mul = mul <2 x i64> %b, %c
   %add = add <2 x i64> %a, %mul
@@ -16,7 +20,11 @@ define <1 x i64> @test_mul_add_1x64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) {
 ; CHECK-LABEL: test_mul_add_1x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mla z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %mul = mul <1 x i64> %b, %c
   %add = add <1 x i64> %mul, %a
@@ -27,7 +35,11 @@ define <2 x i64> @test_mul_sub_2x64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: test_mul_sub_2x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mls z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %mul = mul <2 x i64> %b, %c
   %sub = sub <2 x i64> %a, %mul
@@ -38,6 +50,10 @@ define <2 x i64> @test_mul_sub_2x64_2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,
 ; CHECK-LABEL: test_mul_sub_2x64_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    movprfx z1, z2
 ; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z3.d
@@ -53,8 +69,13 @@ define <2 x i64> @test_mul_sub_2x64_3(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c,
 ; CHECK-LABEL: test_mul_sub_2x64_3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z3.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %div = sdiv <2 x i64> %a, %b
   %mul = mul <2 x i64> %c, %d
@@ -66,6 +87,8 @@ define <1 x i64> @test_mul_sub_1x64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) {
 ; CHECK-LABEL: test_mul_sub_1x64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z2.d
 ; CHECK-NEXT:    sub d0, d1, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll b/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
index 59b98e84aa7d6e..b1b995931ac024 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-add-zext.ll
@@ -4,6 +4,8 @@
 define i16 @test_add_zext_v8i16(<8 x i8> %a, <8 x i8> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_add_zext_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uaddlv h0, v0.16b
 ; CHECK-NEXT:    umov w0, v0.h[0]
@@ -18,6 +20,8 @@ define i16 @test_add_zext_v8i16(<8 x i8> %a, <8 x i8> %b) local_unnamed_addr #0
 define i32 @test_add_zext_v4i32(<4 x i16> %a, <4 x i16> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_add_zext_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uaddlv s0, v0.8h
 ; CHECK-NEXT:    fmov w0, s0
@@ -32,6 +36,8 @@ define i32 @test_add_zext_v4i32(<4 x i16> %a, <4 x i16> %b) local_unnamed_addr #
 define i64 @test_add_zext_v2i64(<2 x i32> %a, <2 x i32> %b) local_unnamed_addr #0 {
 ; CHECK-LABEL: test_add_zext_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uaddlv d0, v0.4s
 ; CHECK-NEXT:    fmov x0, d0

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
index 8a6e7e4841f9cc..36b81d8e495ce6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll
@@ -84,6 +84,7 @@ entry:
 define <vscale x 2 x i64> @dupsext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupsext_v2i8_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtb x8, w0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -132,6 +133,7 @@ entry:
 define <vscale x 2 x i64> @dupsext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupsext_v2i16_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxth x8, w0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -148,6 +150,7 @@ entry:
 define <vscale x 2 x i64> @dupsext_v2i32_v2i64(i32 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupsext_v2i32_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -244,6 +247,7 @@ entry:
 define <vscale x 2 x i64> @dupzext_v2i8_v2i64(i8 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupzext_v2i8_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0xff
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
@@ -292,6 +296,7 @@ entry:
 define <vscale x 2 x i64> @dupzext_v2i16_v2i64(i16 %src, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: dupzext_v2i16_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0xffff
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index 0d450a9a9a5c44..0ce92a20fb3a17 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -106,6 +106,7 @@ define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
 ;
 ; CHECK-GI-LABEL: dupsext_v2i32_v2i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-GI-NEXT:    dup v1.2d, x8
@@ -202,6 +203,7 @@ define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    movi v1.2d, #0x0000000000ffff
 ; CHECK-GI-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    and x8, x0, #0xffff
 ; CHECK-GI-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NEXT:    dup v1.2d, x8
@@ -406,10 +408,10 @@ define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
 ; CHECK-GI-LABEL: shufsext_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-GI-NEXT:    sshll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    sshll v2.8h, v0.8b, #0
 ; CHECK-GI-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -459,10 +461,10 @@ define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
 ; CHECK-GI-LABEL: shufzext_v8i8_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
-; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
-; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ushll v2.8h, v0.8b, #0
 ; CHECK-GI-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI15_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-GI-NEXT:    mul v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
index e9e4bca74cd4ee..888aa9d7f9cdcc 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-extract-scalable.ll
@@ -15,6 +15,7 @@ define <vscale x 16 x i8> @dup_extract_nxv16i8_nxv16i8(<vscale x 16 x i8> %data)
 define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
 ; CHECK-LABEL: dup_extract_nxv16i8_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.b, z0.b[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <16 x i8> %data, i8 1
@@ -26,6 +27,7 @@ define <vscale x 16 x i8> @dup_extract_nxv16i8_v16i8(<16 x i8> %data) {
 define <vscale x 16 x i8> @dup_extract_nxv16i8_v8i8(<8 x i8> %data) {
 ; CHECK-LABEL: dup_extract_nxv16i8_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.b, z0.b[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x i8> %data, i8 1
@@ -48,6 +50,7 @@ define <vscale x 8 x i16> @dup_extract_nxv8i16_nxv8i16(<vscale x 8 x i16> %data)
 define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
 ; CHECK-LABEL: dup_extract_nxv8i16_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x i16> %data, i16 1
@@ -59,6 +62,7 @@ define <vscale x 8 x i16> @dup_extract_nxv8i16_v8i16(<8 x i16> %data) {
 define <vscale x 8 x i16> @dup_extract_nxv8i16_v4i16(<4 x i16> %data) {
 ; CHECK-LABEL: dup_extract_nxv8i16_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x i16> %data, i16 1
@@ -81,6 +85,7 @@ define <vscale x 4 x i32> @dup_extract_nxv4i32_nxv4i32(<vscale x 4 x i32> %data)
 define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
 ; CHECK-LABEL: dup_extract_nxv4i32_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x i32> %data, i32 1
@@ -92,6 +97,7 @@ define <vscale x 4 x i32> @dup_extract_nxv4i32_v4i32(<4 x i32> %data) {
 define <vscale x 4 x i32> @dup_extract_nxv4i32_v2i32(<2 x i32> %data) {
 ; CHECK-LABEL: dup_extract_nxv4i32_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x i32> %data, i32 1
@@ -114,6 +120,7 @@ define <vscale x 2 x i64> @dup_extract_nxv2i64_nxv2i64(<vscale x 2 x i64> %data)
 define <vscale x 2 x i64> @dup_extract_nxv2i64_v2i64(<2 x i64> %data) {
 ; CHECK-LABEL: dup_extract_nxv2i64_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x i64> %data, i64 1
@@ -171,6 +178,7 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_nxv2f16(<vscale x 2 x half> %dat
 define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv8f16_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x half> %data, i16 1
@@ -182,6 +190,7 @@ define <vscale x 8 x half> @dup_extract_nxv8f16_v8f16(<8 x half> %data) {
 define <vscale x 8 x half> @dup_extract_nxv8f16_v4f16(<4 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv8f16_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x half> %data, i16 1
@@ -227,6 +236,7 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_nxv2f16(<vscale x 2 x half> %dat
 define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f16_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x half> %data, i16 1
@@ -238,6 +248,7 @@ define <vscale x 4 x half> @dup_extract_nxv4f16_v8f16(<8 x half> %data) {
 define <vscale x 4 x half> @dup_extract_nxv4f16_v4f16(<4 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f16_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x half> %data, i16 1
@@ -282,6 +293,7 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_nxv2f16(<vscale x 2 x half> %dat
 define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f16_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x half> %data, i16 1
@@ -293,6 +305,7 @@ define <vscale x 2 x half> @dup_extract_nxv2f16_v8f16(<8 x half> %data) {
 define <vscale x 2 x half> @dup_extract_nxv2f16_v4f16(<4 x half> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f16_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x half> %data, i16 1
@@ -327,6 +340,7 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_nxv2f32(<vscale x 2 x float> %d
 define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f32_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x float> %data, i32 1
@@ -338,6 +352,7 @@ define <vscale x 4 x float> @dup_extract_nxv4f32_v4f32(<4 x float> %data) {
 define <vscale x 4 x float> @dup_extract_nxv4f32_v2f32(<2 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv4f32_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x float> %data, i32 1
@@ -371,6 +386,7 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_nxv2f32(<vscale x 2 x float> %d
 define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f32_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x float> %data, i32 1
@@ -382,6 +398,7 @@ define <vscale x 2 x float> @dup_extract_nxv2f32_v4f32(<4 x float> %data) {
 define <vscale x 2 x float> @dup_extract_nxv2f32_v2f32(<2 x float> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f32_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x float> %data, i32 1
@@ -404,6 +421,7 @@ define <vscale x 2 x double> @dup_extract_nxv2f64_nxv2f64(<vscale x 2 x double>
 define <vscale x 2 x double> @dup_extract_nxv2f64_v2f64(<2 x double> %data) {
 ; CHECK-LABEL: dup_extract_nxv2f64_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <2 x double> %data, i64 1
@@ -461,6 +479,7 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_nxv2bf16(<vscale x 2 x bfloat
 define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv8bf16_v8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x bfloat> %data, i16 1
@@ -472,6 +491,7 @@ define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v8bf16(<8 x bfloat> %data) {
 define <vscale x 8 x bfloat> @dup_extract_nxv8bf16_v4bf16(<4 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv8bf16_v4bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x bfloat> %data, i16 1
@@ -517,6 +537,7 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat
 define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv4bf16_v8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x bfloat> %data, i16 1
@@ -528,6 +549,7 @@ define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v8bf16(<8 x bfloat> %data) {
 define <vscale x 4 x bfloat> @dup_extract_nxv4bf16_v4bf16(<4 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv4bf16_v4bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x bfloat> %data, i16 1
@@ -572,6 +594,7 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat
 define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv2bf16_v8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <8 x bfloat> %data, i16 1
@@ -583,6 +606,7 @@ define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v8bf16(<8 x bfloat> %data) {
 define <vscale x 2 x bfloat> @dup_extract_nxv2bf16_v4bf16(<4 x bfloat> %data) {
 ; CHECK-LABEL: dup_extract_nxv2bf16_v4bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
 ; CHECK-NEXT:    ret
   %1 = extractelement <4 x bfloat> %data, i16 1

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
index bd8ad9c0161936..63dcafed2320a0 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll
@@ -13,6 +13,7 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK0-SDAG-LABEL: halfword:
 ; CHECK0-SDAG:       // %bb.0:
 ; CHECK0-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x8, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK0-SDAG-NEXT:    mov x19, x0
@@ -42,6 +43,7 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK3-SDAG-LABEL: halfword:
 ; CHECK3-SDAG:       // %bb.0:
 ; CHECK3-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK3-SDAG-NEXT:    mov x19, x0
@@ -81,6 +83,7 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK0-SDAG-LABEL: word:
 ; CHECK0-SDAG:       // %bb.0:
 ; CHECK0-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK0-SDAG-NEXT:    mov x19, x0
@@ -110,6 +113,7 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK3-SDAG-LABEL: word:
 ; CHECK3-SDAG:       // %bb.0:
 ; CHECK3-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK3-SDAG-NEXT:    mov x19, x0
@@ -149,6 +153,7 @@ define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK0-SDAG-LABEL: doubleword:
 ; CHECK0-SDAG:       // %bb.0:
 ; CHECK0-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK0-SDAG-NEXT:    mov x19, x0
@@ -178,6 +183,7 @@ define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind {
 ; CHECK3-SDAG-LABEL: doubleword:
 ; CHECK3-SDAG:       // %bb.0:
 ; CHECK3-SDAG-NEXT:    stp x30, x21, [sp, #-32]! // 16-byte Folded Spill
+; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK3-SDAG-NEXT:    mov x19, x0
@@ -225,6 +231,7 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
 ; CHECK0-SDAG-NEXT:    .cfi_offset w21, -24
 ; CHECK0-SDAG-NEXT:    .cfi_offset w22, -32
 ; CHECK0-SDAG-NEXT:    .cfi_offset w30, -48
+; CHECK0-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK0-SDAG-NEXT:    ubfx x8, x1, #9, #8
 ; CHECK0-SDAG-NEXT:    mov x19, x0
 ; CHECK0-SDAG-NEXT:    lsl x21, x8, #1
@@ -270,6 +277,7 @@ define i16 @multi_use_half_word(ptr %ctx, i32 %xor72) {
 ; CHECK3-SDAG-NEXT:    .cfi_offset w21, -24
 ; CHECK3-SDAG-NEXT:    .cfi_offset w22, -32
 ; CHECK3-SDAG-NEXT:    .cfi_offset w30, -48
+; CHECK3-SDAG-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK3-SDAG-NEXT:    ubfx x21, x1, #9, #8
 ; CHECK3-SDAG-NEXT:    mov x19, x0
 ; CHECK3-SDAG-NEXT:    ldrh w20, [x0, x21, lsl #1]

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
index 7141f53802bff7..07fbe5d7310f60 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll
@@ -27,8 +27,9 @@ BB:
 define void @f_undef_15(<8 x i64> %a, ptr %dst) {
 ; CHECK-LABEL: f_undef_15:
 ; CHECK:       // %bb.0: // %BB
-; CHECK-NEXT:    mov v1.16b, v0.16b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
 ; CHECK-NEXT:    mov x8, x0
+; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    st2 { v0.2d, v1.2d }, [x8], #32
 ; CHECK-NEXT:    st2 { v0.2d, v1.2d }, [x8]
 ; CHECK-NEXT:    add x8, x0, #64
@@ -45,17 +46,19 @@ BB:
 define void @f_undef_1(<8 x i64> %a, ptr %dst) {
 ; CHECK-LABEL: f_undef_1:
 ; CHECK:       // %bb.0: // %BB
-; CHECK-NEXT:    mov v4.16b, v2.16b
-; CHECK-NEXT:    mov v5.16b, v0.16b
+; CHECK-NEXT:    mov v16.16b, v0.16b
+; CHECK-NEXT:    mov v5.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q1_q2
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $q3_q4
 ; CHECK-NEXT:    mov x8, x0
-; CHECK-NEXT:    mov v6.16b, v0.16b
 ; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    st2 { v5.2d, v6.2d }, [x8], #32
-; CHECK-NEXT:    mov v5.16b, v4.16b
+; CHECK-NEXT:    mov v4.16b, v3.16b
+; CHECK-NEXT:    mov v17.16b, v16.16b
+; CHECK-NEXT:    mov v6.16b, v5.16b
+; CHECK-NEXT:    st2 { v16.2d, v17.2d }, [x8], #32
 ; CHECK-NEXT:    st2 { v1.2d, v2.2d }, [x8]
 ; CHECK-NEXT:    add x8, x0, #64
-; CHECK-NEXT:    st2 { v4.2d, v5.2d }, [x8]
-; CHECK-NEXT:    mov v4.16b, v3.16b
+; CHECK-NEXT:    st2 { v5.2d, v6.2d }, [x8]
 ; CHECK-NEXT:    add x8, x0, #96
 ; CHECK-NEXT:    st2 { v3.2d, v4.2d }, [x8]
 ; CHECK-NEXT:    ret
@@ -70,10 +73,11 @@ define void @noundefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) {
 ; CHECK-LABEL: noundefs:
 ; CHECK:       // %bb.0: // %BB
 ; CHECK-NEXT:    mov v5.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $q2_q3
 ; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    mov v2.16b, v3.16b
+; CHECK-NEXT:    mov v2.16b, v1.16b
 ; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x0], #32
-; CHECK-NEXT:    st2 { v1.4s, v2.4s }, [x0]
+; CHECK-NEXT:    st2 { v2.4s, v3.4s }, [x0]
 ; CHECK-NEXT:    ret
 BB:
   %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 undef, i32 14, i32 7, i32 15>
@@ -85,10 +89,11 @@ define void @undefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) {
 ; CHECK-LABEL: undefs:
 ; CHECK:       // %bb.0: // %BB
 ; CHECK-NEXT:    mov v5.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $q2_q3
 ; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    mov v2.16b, v3.16b
+; CHECK-NEXT:    mov v2.16b, v1.16b
 ; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x0], #32
-; CHECK-NEXT:    st2 { v1.4s, v2.4s }, [x0]
+; CHECK-NEXT:    st2 { v2.4s, v3.4s }, [x0]
 ; CHECK-NEXT:    ret
 BB:
   %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 15>

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index 16479a526ab0a2..317feb5ad9ad08 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -6,6 +6,7 @@ define <2 x i16> @test0(ptr %i16_ptr, i64 %inc) {
 ; CHECK-LE-LABEL: test0:
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test0:
@@ -24,6 +25,7 @@ define <2 x i16> @test1(ptr %v2i16_ptr) {
 ; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-LE-NEXT:    add x8, x0, #2
 ; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test1:
@@ -43,6 +45,7 @@ define <2 x i16> @test2(ptr %i16_ptr, i64 %inc) {
 ; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-LE-NEXT:    add x8, x0, x1, lsl #1
 ; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test2:
@@ -66,6 +69,7 @@ define <2 x i8> @test3(ptr %v2i8_ptr) {
 ; CHECK-LE-NEXT:    ld1 { v0.b }[0], [x0]
 ; CHECK-LE-NEXT:    add x8, x0, #1
 ; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test3:
@@ -84,6 +88,7 @@ define <4 x i8> @test4(ptr %v4i8_ptr) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test4:
@@ -104,6 +109,7 @@ define <2 x i32> @fsext_v2i32(ptr %a) {
 ; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
 ; CHECK-LE-NEXT:    fmov s0, w8
 ; CHECK-LE-NEXT:    mov v0.s[1], w9
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fsext_v2i32:
@@ -247,6 +253,7 @@ define <2 x i16> @fsext_v2i16(ptr %a) {
 ; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
 ; CHECK-LE-NEXT:    fmov s0, w8
 ; CHECK-LE-NEXT:    mov v0.s[1], w9
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fsext_v2i16:
@@ -290,6 +297,7 @@ define <4 x i16> @fsext_v4i16(ptr %a) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fsext_v4i16:
@@ -351,6 +359,7 @@ define <4 x i16> @fzext_v4i16(ptr %a) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: fzext_v4i16:
@@ -447,6 +456,7 @@ define <4 x i8> @strict_align_aligned(ptr %v4i8_ptr) "target-features"="+strict-
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    ldr s0, [x0]
 ; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: strict_align_aligned:
@@ -470,6 +480,7 @@ define <4 x i8> @strict_align_unaligned(ptr %v4i8_ptr) "target-features"="+stric
 ; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
 ; CHECK-LE-NEXT:    add x8, x0, #3
 ; CHECK-LE-NEXT:    ld1 { v0.b }[6], [x8]
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: strict_align_unaligned:

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
index 6f03e14bbbe8ed..fb6575cc0ee83b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll
@@ -1,11 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s --check-prefix=CHECK-SD
-; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefix=CHECK-GI
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 define void @matrix_mul_unsigned(i32 %N, ptr nocapture %C, ptr nocapture readonly %A, i16 %val) {
 ; CHECK-SD-LABEL: matrix_mul_unsigned:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -90,6 +91,7 @@ define void @matrix_mul_signed(i32 %N, ptr nocapture %C, ptr nocapture readonly
 ; CHECK-SD-LABEL: matrix_mul_signed:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:  .LBB1_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -108,6 +110,7 @@ define void @matrix_mul_signed(i32 %N, ptr nocapture %C, ptr nocapture readonly
 ; CHECK-GI-LABEL: matrix_mul_signed:
 ; CHECK-GI:       // %bb.0: // %vector.header
 ; CHECK-GI-NEXT:    sxth w9, w3
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    dup v0.4s, w9
 ; CHECK-GI-NEXT:    and x8, x8, #0xfffffff8
@@ -175,7 +178,9 @@ define void @matrix_mul_double_shuffle(i32 %N, ptr nocapture %C, ptr nocapture r
 ; CHECK-SD-LABEL: matrix_mul_double_shuffle:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 killed $x0 def $x0
 ; CHECK-SD-NEXT:  .LBB2_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldrh w9, [x2], #16
@@ -752,6 +757,7 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
 ; CHECK-SD-LABEL: sink_v2z64_1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov x8, xzr
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:  .LBB6_1: // %loop
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldr d1, [x0]
@@ -817,6 +823,7 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) {
 ; CHECK-SD-LABEL: sink_v4i64_1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mov x8, xzr
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:  .LBB7_1: // %loop
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-SD-NEXT:    ldr q1, [x0]
@@ -1017,6 +1024,7 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea
 ; CHECK-SD-LABEL: matrix_mul_unsigned_and:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.4h, w3
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:  .LBB10_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -1101,6 +1109,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt
 ; CHECK-SD-LABEL: matrix_mul_unsigned_and_double:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    dup v0.8h, w3
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff0
 ; CHECK-SD-NEXT:  .LBB11_1: // %vector.body
 ; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -1195,6 +1204,7 @@ define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture reado
 ; CHECK-SD-LABEL: matrix_mul_signed_and:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    and w9, w3, #0xffff
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff8
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:  .LBB12_1: // %vector.body
@@ -1282,6 +1292,7 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur
 ; CHECK-SD-LABEL: matrix_mul_signed_and_double:
 ; CHECK-SD:       // %bb.0: // %vector.header
 ; CHECK-SD-NEXT:    and w9, w3, #0xffff
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xfffffff0
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:  .LBB13_1: // %vector.body
@@ -1378,3 +1389,6 @@ for.end12:                                        ; preds = %vector.body
 }
 
 declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
index 53e07eff1541d6..f7aa57a068a4ce 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -458,6 +458,7 @@ define i8 @sminv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxtb w8, w8
@@ -509,6 +510,7 @@ define i8 @sminv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -573,6 +575,7 @@ define i16 @sminv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxth w8, w8
@@ -589,6 +592,7 @@ entry:
 define i16 @sminv_v3i16(<3 x i16> %a) {
 ; CHECK-LABEL: sminv_v3i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    mov v0.h[3], w8
 ; CHECK-NEXT:    sminv h0, v0.4h
@@ -706,7 +710,10 @@ entry:
 define i64 @sminv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: sminv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v2.d[1], x8
 ; CHECK-SD-NEXT:    cmgt v1.2d, v2.2d, v0.2d
@@ -719,7 +726,10 @@ define i64 @sminv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: sminv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x8, #9223372036854775807 // =0x7fffffffffffffff
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    cmgt v1.2d, v2.2d, v0.2d
@@ -800,6 +810,7 @@ define i8 @smaxv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxtb w8, w8
@@ -851,6 +862,7 @@ define i8 @smaxv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -915,6 +927,7 @@ define i16 @smaxv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    sxth w8, w8
@@ -931,6 +944,7 @@ entry:
 define i16 @smaxv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: smaxv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, #-32768 // =0xffff8000
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
 ; CHECK-SD-NEXT:    smaxv h0, v0.4h
@@ -939,6 +953,7 @@ define i16 @smaxv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
 ; CHECK-GI-NEXT:    smaxv h0, v0.4h
@@ -1056,7 +1071,10 @@ entry:
 define i64 @smaxv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: smaxv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v2.d[1], x8
 ; CHECK-SD-NEXT:    cmgt v1.2d, v0.2d, v2.2d
@@ -1069,7 +1087,10 @@ define i64 @smaxv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: smaxv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x8, #-9223372036854775808 // =0x8000000000000000
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    cmgt v1.2d, v0.2d, v2.2d
@@ -1150,6 +1171,7 @@ define i8 @uminv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xff
@@ -1199,6 +1221,7 @@ define i8 @uminv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -1263,6 +1286,7 @@ define i16 @uminv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xffff
@@ -1279,6 +1303,7 @@ entry:
 define i16 @uminv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: uminv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, #-1 // =0xffffffff
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
 ; CHECK-SD-NEXT:    uminv h0, v0.4h
@@ -1287,6 +1312,7 @@ define i16 @uminv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, #65535 // =0xffff
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
 ; CHECK-GI-NEXT:    uminv h0, v0.4h
@@ -1404,7 +1430,10 @@ entry:
 define i64 @uminv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: uminv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov x8, #-1 // =0xffffffffffffffff
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v2.d[1], x8
 ; CHECK-SD-NEXT:    cmhi v1.2d, v2.2d, v0.2d
@@ -1417,7 +1446,10 @@ define i64 @uminv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: uminv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x8, #-1 // =0xffffffffffffffff
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    cmhi v1.2d, v2.2d, v0.2d
@@ -1498,6 +1530,7 @@ define i8 @umaxv_v2i8(<2 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xff
@@ -1547,6 +1580,7 @@ define i8 @umaxv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -1611,6 +1645,7 @@ define i16 @umaxv_v2i16(<2 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    and w8, w8, #0xffff
@@ -1627,6 +1662,7 @@ entry:
 define i16 @umaxv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: umaxv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.h[3], wzr
 ; CHECK-SD-NEXT:    umaxv h0, v0.4h
 ; CHECK-SD-NEXT:    fmov w0, s0
@@ -1634,6 +1670,7 @@ define i16 @umaxv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, #0 // =0x0
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
 ; CHECK-GI-NEXT:    umaxv h0, v0.4h
@@ -1750,7 +1787,10 @@ entry:
 define i64 @umaxv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: umaxv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.16b, v2.16b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v3.d[1], xzr
 ; CHECK-SD-NEXT:    cmhi v3.2d, v0.2d, v3.2d
@@ -1764,6 +1804,9 @@ define i64 @umaxv_v3i64(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: umaxv_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], xzr
 ; CHECK-GI-NEXT:    cmhi v1.2d, v0.2d, v2.2d

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
index 37c4ed3607f047..bccb70ccd5c036 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops-mte.ll
@@ -38,7 +38,7 @@ entry:
 define ptr @memset_tagged_1_zeroval(ptr %dst, i64 %size) {
 ; GISel-O0-LABEL: memset_tagged_1_zeroval:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #1 // =0x1
+; GISel-O0-NEXT:    mov w8, #1
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    mov x9, xzr
 ; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
@@ -48,7 +48,7 @@ define ptr @memset_tagged_1_zeroval(ptr %dst, i64 %size) {
 ;
 ; GISel-LABEL: memset_tagged_1_zeroval:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #1 // =0x1
+; GISel-NEXT:    mov w8, #1
 ; GISel-NEXT:    setgp [x0]!, x8!, xzr
 ; GISel-NEXT:    setgm [x0]!, x8!, xzr
 ; GISel-NEXT:    setge [x0]!, x8!, xzr
@@ -56,7 +56,7 @@ define ptr @memset_tagged_1_zeroval(ptr %dst, i64 %size) {
 ;
 ; SDAG-LABEL: memset_tagged_1_zeroval:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #1 // =0x1
+; SDAG-NEXT:    mov w8, #1
 ; SDAG-NEXT:    setgp [x0]!, x8!, xzr
 ; SDAG-NEXT:    setgm [x0]!, x8!, xzr
 ; SDAG-NEXT:    setge [x0]!, x8!, xzr
@@ -69,7 +69,7 @@ entry:
 define ptr @memset_tagged_10_zeroval(ptr %dst, i64 %size) {
 ; GISel-O0-LABEL: memset_tagged_10_zeroval:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10 // =0xa
+; GISel-O0-NEXT:    mov w8, #10
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    mov x9, xzr
 ; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
@@ -79,7 +79,7 @@ define ptr @memset_tagged_10_zeroval(ptr %dst, i64 %size) {
 ;
 ; GISel-LABEL: memset_tagged_10_zeroval:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10 // =0xa
+; GISel-NEXT:    mov w8, #10
 ; GISel-NEXT:    setgp [x0]!, x8!, xzr
 ; GISel-NEXT:    setgm [x0]!, x8!, xzr
 ; GISel-NEXT:    setge [x0]!, x8!, xzr
@@ -87,7 +87,7 @@ define ptr @memset_tagged_10_zeroval(ptr %dst, i64 %size) {
 ;
 ; SDAG-LABEL: memset_tagged_10_zeroval:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10 // =0xa
+; SDAG-NEXT:    mov w8, #10
 ; SDAG-NEXT:    setgp [x0]!, x8!, xzr
 ; SDAG-NEXT:    setgm [x0]!, x8!, xzr
 ; SDAG-NEXT:    setge [x0]!, x8!, xzr
@@ -100,7 +100,7 @@ entry:
 define ptr @memset_tagged_10000_zeroval(ptr %dst, i64 %size) {
 ; GISel-O0-LABEL: memset_tagged_10000_zeroval:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10000 // =0x2710
+; GISel-O0-NEXT:    mov w8, #10000
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    mov x9, xzr
 ; GISel-O0-NEXT:    setgp [x0]!, x8!, x9
@@ -110,7 +110,7 @@ define ptr @memset_tagged_10000_zeroval(ptr %dst, i64 %size) {
 ;
 ; GISel-LABEL: memset_tagged_10000_zeroval:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10000 // =0x2710
+; GISel-NEXT:    mov w8, #10000
 ; GISel-NEXT:    setgp [x0]!, x8!, xzr
 ; GISel-NEXT:    setgm [x0]!, x8!, xzr
 ; GISel-NEXT:    setge [x0]!, x8!, xzr
@@ -118,7 +118,7 @@ define ptr @memset_tagged_10000_zeroval(ptr %dst, i64 %size) {
 ;
 ; SDAG-LABEL: memset_tagged_10000_zeroval:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10000 // =0x2710
+; SDAG-NEXT:    mov w8, #10000
 ; SDAG-NEXT:    setgp [x0]!, x8!, xzr
 ; SDAG-NEXT:    setgm [x0]!, x8!, xzr
 ; SDAG-NEXT:    setge [x0]!, x8!, xzr
@@ -169,6 +169,7 @@ define ptr @memset_tagged_0(ptr %dst, i64 %size, i32 %value) {
 ; GISel-LABEL: memset_tagged_0:
 ; GISel:       // %bb.0: // %entry
 ; GISel-NEXT:    mov x8, xzr
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -177,6 +178,7 @@ define ptr @memset_tagged_0(ptr %dst, i64 %size, i32 %value) {
 ; SDAG-LABEL: memset_tagged_0:
 ; SDAG:       // %bb.0: // %entry
 ; SDAG-NEXT:    mov x8, xzr
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -190,7 +192,7 @@ entry:
 define ptr @memset_tagged_1(ptr %dst, i64 %size, i32 %value) {
 ; GISel-O0-LABEL: memset_tagged_1:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #1 // =0x1
+; GISel-O0-NEXT:    mov w8, #1
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    // implicit-def: $x9
 ; GISel-O0-NEXT:    mov w9, w2
@@ -201,7 +203,8 @@ define ptr @memset_tagged_1(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_1:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #1 // =0x1
+; GISel-NEXT:    mov w8, #1
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -209,7 +212,8 @@ define ptr @memset_tagged_1(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_1:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #1 // =0x1
+; SDAG-NEXT:    mov w8, #1
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -223,7 +227,7 @@ entry:
 define ptr @memset_tagged_10(ptr %dst, i64 %size, i32 %value) {
 ; GISel-O0-LABEL: memset_tagged_10:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10 // =0xa
+; GISel-O0-NEXT:    mov w8, #10
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    // implicit-def: $x9
 ; GISel-O0-NEXT:    mov w9, w2
@@ -234,7 +238,8 @@ define ptr @memset_tagged_10(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_10:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10 // =0xa
+; GISel-NEXT:    mov w8, #10
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -242,7 +247,8 @@ define ptr @memset_tagged_10(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_10:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10 // =0xa
+; SDAG-NEXT:    mov w8, #10
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -256,7 +262,7 @@ entry:
 define ptr @memset_tagged_10000(ptr %dst, i64 %size, i32 %value) {
 ; GISel-O0-LABEL: memset_tagged_10000:
 ; GISel-O0:       // %bb.0: // %entry
-; GISel-O0-NEXT:    mov w8, #10000 // =0x2710
+; GISel-O0-NEXT:    mov w8, #10000
 ; GISel-O0-NEXT:    // kill: def $x8 killed $w8
 ; GISel-O0-NEXT:    // implicit-def: $x9
 ; GISel-O0-NEXT:    mov w9, w2
@@ -267,7 +273,8 @@ define ptr @memset_tagged_10000(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_10000:
 ; GISel:       // %bb.0: // %entry
-; GISel-NEXT:    mov w8, #10000 // =0x2710
+; GISel-NEXT:    mov w8, #10000
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x8!, x2
 ; GISel-NEXT:    setgm [x0]!, x8!, x2
 ; GISel-NEXT:    setge [x0]!, x8!, x2
@@ -275,7 +282,8 @@ define ptr @memset_tagged_10000(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_10000:
 ; SDAG:       // %bb.0: // %entry
-; SDAG-NEXT:    mov w8, #10000 // =0x2710
+; SDAG-NEXT:    mov w8, #10000
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x8!, x2
 ; SDAG-NEXT:    setgm [x0]!, x8!, x2
 ; SDAG-NEXT:    setge [x0]!, x8!, x2
@@ -298,6 +306,7 @@ define ptr @memset_tagged_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_size:
 ; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x1!, x2
 ; GISel-NEXT:    setgm [x0]!, x1!, x2
 ; GISel-NEXT:    setge [x0]!, x1!, x2
@@ -305,6 +314,7 @@ define ptr @memset_tagged_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_size:
 ; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x1!, x2
 ; SDAG-NEXT:    setgm [x0]!, x1!, x2
 ; SDAG-NEXT:    setge [x0]!, x1!, x2
@@ -327,6 +337,7 @@ define ptr @memset_tagged_size_aligned(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-LABEL: memset_tagged_size_aligned:
 ; GISel:       // %bb.0: // %entry
+; GISel-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-NEXT:    setgp [x0]!, x1!, x2
 ; GISel-NEXT:    setgm [x0]!, x1!, x2
 ; GISel-NEXT:    setge [x0]!, x1!, x2
@@ -334,6 +345,7 @@ define ptr @memset_tagged_size_aligned(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-LABEL: memset_tagged_size_aligned:
 ; SDAG:       // %bb.0: // %entry
+; SDAG-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-NEXT:    setgp [x0]!, x1!, x2
 ; SDAG-NEXT:    setgm [x0]!, x1!, x2
 ; SDAG-NEXT:    setge [x0]!, x1!, x2

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index ed745c12a71963..ff7872c922e32f 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -432,6 +432,7 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; GISel-WITHOUT-MOPS-O3-LABEL: memset_10:
 ; GISel-WITHOUT-MOPS-O3:       // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-WITHOUT-MOPS-O3-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GISel-WITHOUT-MOPS-O3-NEXT:    and x9, x1, #0xff
 ; GISel-WITHOUT-MOPS-O3-NEXT:    mul x8, x9, x8
@@ -453,6 +454,7 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; GISel-MOPS-O3-LABEL: memset_10:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; GISel-MOPS-O3-NEXT:    and x9, x1, #0xff
 ; GISel-MOPS-O3-NEXT:    mul x8, x9, x8
@@ -462,6 +464,7 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10:
 ; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -471,6 +474,7 @@ define void @memset_10(ptr %dst, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_10:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -519,6 +523,7 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
 ; GISel-MOPS-O3-LABEL: memset_10_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
 ; GISel-MOPS-O3-NEXT:    mov w8, #10 // =0xa
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
@@ -526,6 +531,7 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
 ;
 ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
 ; SDAG-WITHOUT-MOPS-O2:       // %bb.0: // %entry
+; SDAG-WITHOUT-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-WITHOUT-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -535,6 +541,7 @@ define void @memset_10_volatile(ptr %dst, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_10_volatile:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
 ; SDAG-MOPS-O2-NEXT:    and x9, x1, #0xff
 ; SDAG-MOPS-O2-NEXT:    mul x8, x9, x8
@@ -583,6 +590,7 @@ define void @memset_10000(ptr %dst, i32 %value) {
 ; GISel-MOPS-O3-LABEL: memset_10000:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
 ; GISel-MOPS-O3-NEXT:    mov w8, #10000 // =0x2710
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
@@ -601,6 +609,7 @@ define void @memset_10000(ptr %dst, i32 %value) {
 ; SDAG-MOPS-O2-LABEL: memset_10000:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
 ; SDAG-MOPS-O2-NEXT:    mov w8, #10000 // =0x2710
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, x1
@@ -647,6 +656,7 @@ define void @memset_10000_volatile(ptr %dst, i32 %value) {
 ; GISel-MOPS-O3-LABEL: memset_10000_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
 ; GISel-MOPS-O3-NEXT:    mov w8, #10000 // =0x2710
+; GISel-MOPS-O3-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x8!, x1
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x8!, x1
@@ -665,6 +675,7 @@ define void @memset_10000_volatile(ptr %dst, i32 %value) {
 ; SDAG-MOPS-O2-LABEL: memset_10000_volatile:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
 ; SDAG-MOPS-O2-NEXT:    mov w8, #10000 // =0x2710
+; SDAG-MOPS-O2-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x8!, x1
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x8!, x1
@@ -713,6 +724,7 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-MOPS-O3-LABEL: memset_size:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
@@ -732,6 +744,7 @@ define void @memset_size(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_size:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, x2
@@ -780,6 +793,7 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) {
 ;
 ; GISel-MOPS-O3-LABEL: memset_size_volatile:
 ; GISel-MOPS-O3:       // %bb.0: // %entry
+; GISel-MOPS-O3-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; GISel-MOPS-O3-NEXT:    setp [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    setm [x0]!, x1!, x2
 ; GISel-MOPS-O3-NEXT:    sete [x0]!, x1!, x2
@@ -799,6 +813,7 @@ define void @memset_size_volatile(ptr %dst, i64 %size, i32 %value) {
 ;
 ; SDAG-MOPS-O2-LABEL: memset_size_volatile:
 ; SDAG-MOPS-O2:       // %bb.0: // %entry
+; SDAG-MOPS-O2-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; SDAG-MOPS-O2-NEXT:    setp [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    setm [x0]!, x1!, x2
 ; SDAG-MOPS-O2-NEXT:    sete [x0]!, x1!, x2

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
index 8cf86903a49262..c9864a357186dd 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll
@@ -88,6 +88,7 @@ define i64 @smull2(i64 %x, i32 %y) {
 ;
 ; CHECK-GI-LABEL: smull2:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -108,6 +109,7 @@ define i64 @smull2_commuted(i64 %x, i32 %y) {
 ;
 ; CHECK-GI-LABEL: smull2_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -123,6 +125,7 @@ entry:
 define i64 @smull_ldrsb_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsb_b:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -130,6 +133,7 @@ define i64 @smull_ldrsb_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_b:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -145,6 +149,7 @@ entry:
 define i64 @smull_ldrsb_b_commuted(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsb_b_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w9, w8
@@ -152,6 +157,7 @@ define i64 @smull_ldrsb_b_commuted(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_b_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -167,6 +173,7 @@ entry:
 define i64 @smull_ldrsb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -174,6 +181,7 @@ define i64 @smull_ldrsb_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -195,6 +203,7 @@ define i64 @smull_ldrsb_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsb_w:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -210,6 +219,7 @@ entry:
 define i64 @smull_ldrsh_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsh_b:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsh x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -217,6 +227,7 @@ define i64 @smull_ldrsh_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_b:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -232,6 +243,7 @@ entry:
 define i64 @smull_ldrsh_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsh_h:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsh x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -239,6 +251,7 @@ define i64 @smull_ldrsh_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_h:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -254,6 +267,7 @@ entry:
 define i64 @smull_ldrsh_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsh_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsh x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w9, w8
@@ -261,6 +275,7 @@ define i64 @smull_ldrsh_h_commuted(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -282,6 +297,7 @@ define i64 @smull_ldrsh_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -297,6 +313,7 @@ entry:
 define i64 @smull_ldrsw_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_b:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -304,6 +321,7 @@ define i64 @smull_ldrsw_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_b:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -319,6 +337,7 @@ entry:
 define i64 @smull_ldrsw_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_h:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -326,6 +345,7 @@ define i64 @smull_ldrsw_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_h:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -347,6 +367,7 @@ define i64 @smull_ldrsw_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_w:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -368,6 +389,7 @@ define i64 @smull_ldrsw_w_commuted(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_ldrsw_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mul x0, x9, x8
@@ -383,6 +405,8 @@ entry:
 define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_sext_bb:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxtb x8, w0
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smull x0, w8, w9
@@ -390,6 +414,8 @@ define i64 @smull_sext_bb(i8 %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smull_sext_bb:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxtb x8, w0
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -442,6 +468,7 @@ define i64 @smull_ldrsw_zexth(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_zexth:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    smull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -449,6 +476,7 @@ define i64 @smull_ldrsw_zexth(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: smull_ldrsw_zexth:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -464,6 +492,7 @@ define i64 @smull_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -471,6 +500,7 @@ define i64 @smull_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: smull_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -486,6 +516,7 @@ define i64 @smull_ldrsw_zextb_commuted(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smull_ldrsw_zextb_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smull x0, w9, w8
 ; CHECK-SD-NEXT:    ret
@@ -493,6 +524,7 @@ define i64 @smull_ldrsw_zextb_commuted(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: smull_ldrsw_zextb_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mul x0, x9, x8
 ; CHECK-GI-NEXT:    ret
@@ -507,6 +539,7 @@ entry:
 define i64 @smaddl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
@@ -514,6 +547,7 @@ define i64 @smaddl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -530,6 +564,7 @@ entry:
 define i64 @smaddl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w9, w8, x2
@@ -537,6 +572,7 @@ define i64 @smaddl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
@@ -559,6 +595,7 @@ define i64 @smaddl_ldrsh_w(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -581,6 +618,7 @@ define i64 @smaddl_ldrsh_w_commuted(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsh_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
@@ -597,6 +635,7 @@ entry:
 define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsw_b:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
@@ -604,6 +643,7 @@ define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsw_b:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -619,6 +659,7 @@ define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 define i64 @smaddl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsw_b_commuted:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w9, w8, x2
@@ -626,6 +667,7 @@ define i64 @smaddl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_ldrsw_b_commuted:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
@@ -665,6 +707,8 @@ entry:
 define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_sext_hh:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxth x8, w0
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
@@ -672,6 +716,8 @@ define i64 @smaddl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smaddl_sext_hh:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxth x8, w0
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
@@ -711,6 +757,7 @@ define i64 @smaddl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smaddl_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smaddl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -718,6 +765,7 @@ define i64 @smaddl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: smaddl_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -733,6 +781,7 @@ entry:
 define i64 @smnegl_ldrsb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
@@ -740,6 +789,7 @@ define i64 @smnegl_ldrsb_h(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -756,6 +806,7 @@ entry:
 define i64 @smnegl_ldrsb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w9, w8
@@ -763,6 +814,7 @@ define i64 @smnegl_ldrsb_h_commuted(ptr %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
@@ -785,6 +837,7 @@ define i64 @smnegl_ldrsh_w(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -807,6 +860,7 @@ define i64 @smnegl_ldrsh_w_commuted(ptr %x0, i32 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsh_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
@@ -823,6 +877,7 @@ entry:
 define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsw_b:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
@@ -830,6 +885,7 @@ define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsw_b:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -845,6 +901,7 @@ define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) {
 define i64 @smnegl_ldrsw_b_commuted(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsw_b_commuted:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w9, w8
@@ -852,6 +909,7 @@ define i64 @smnegl_ldrsw_b_commuted(ptr %x0, i8 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_ldrsw_b_commuted:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
@@ -891,6 +949,8 @@ entry:
 define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) {
 ; CHECK-SD-LABEL: smnegl_sext_hh:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxth x8, w0
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
@@ -898,6 +958,8 @@ define i64 @smnegl_sext_hh(i16 %x0, i16 %x1) {
 ;
 ; CHECK-GI-LABEL: smnegl_sext_hh:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxth x8, w0
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
@@ -937,6 +999,7 @@ define i64 @smnegl_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: smnegl_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smnegl x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -944,6 +1007,7 @@ define i64 @smnegl_ldrsw_zextb(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: smnegl_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -959,6 +1023,7 @@ entry:
 define i64 @smsubl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsb_h:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
@@ -966,6 +1031,7 @@ define i64 @smsubl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsb_h:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -982,6 +1048,7 @@ entry:
 define i64 @smsubl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsb x8, [x0]
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w9, w8, x2
@@ -989,6 +1056,7 @@ define i64 @smsubl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsb x8, [x0]
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
@@ -1011,6 +1079,7 @@ define i64 @smsubl_ldrsh_w(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsh_w:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1033,6 +1102,7 @@ define i64 @smsubl_ldrsh_w_commuted(ptr %x0, i32 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsh_w_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsh x8, [x0]
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
@@ -1049,6 +1119,7 @@ entry:
 define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsw_b:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
@@ -1056,6 +1127,7 @@ define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsw_b:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1071,6 +1143,7 @@ define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) {
 define i64 @smsubl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsw_b_commuted:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
 ; CHECK-SD-NEXT:    sxtb x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w9, w8, x2
@@ -1078,6 +1151,7 @@ define i64 @smsubl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_ldrsw_b_commuted:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
@@ -1117,6 +1191,8 @@ entry:
 define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_sext_hh:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    sxth x8, w0
 ; CHECK-SD-NEXT:    sxth x9, w1
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
@@ -1124,6 +1200,8 @@ define i64 @smsubl_sext_hh(i16 %x0, i16 %x1, i64 %x2) {
 ;
 ; CHECK-GI-LABEL: smsubl_sext_hh:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    sxth x8, w0
 ; CHECK-GI-NEXT:    sxth x9, w1
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
@@ -1163,6 +1241,7 @@ define i64 @smsubl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: smsubl_ldrsw_zextb:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrsw x8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    smsubl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1170,6 +1249,7 @@ define i64 @smsubl_ldrsw_zextb(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: smsubl_ldrsw_zextb:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrsw x8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1185,6 +1265,7 @@ entry:
 define i64 @smull_sext_ashr31(i32 %a, i64 %b) nounwind {
 ; CHECK-LABEL: smull_sext_ashr31:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    asr x9, x1, #31
 ; CHECK-NEXT:    mul x0, x8, x9
@@ -1205,6 +1286,7 @@ define i64 @smull_sext_ashr32(i32 %a, i64 %b) nounwind {
 ;
 ; CHECK-GI-LABEL: smull_sext_ashr32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    sxtw x8, w0
 ; CHECK-GI-NEXT:    asr x9, x1, #32
 ; CHECK-GI-NEXT:    mul x0, x8, x9
@@ -1256,6 +1338,7 @@ define i64 @umull_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umull_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1263,6 +1346,7 @@ define i64 @umull_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umull_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1278,6 +1362,7 @@ define i64 @umull_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umull_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umull x0, w9, w8
 ; CHECK-SD-NEXT:    ret
@@ -1285,6 +1370,7 @@ define i64 @umull_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umull_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mul x0, x9, x8
 ; CHECK-GI-NEXT:    ret
@@ -1321,6 +1407,7 @@ define i64 @umull_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: umull_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umull x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1328,6 +1415,7 @@ define i64 @umull_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: umull_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mul x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1408,6 +1496,7 @@ define i64 @umaddl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umaddl_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1415,6 +1504,7 @@ define i64 @umaddl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umaddl_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1431,6 +1521,7 @@ define i64 @umaddl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umaddl_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umaddl x0, w9, w8, x2
 ; CHECK-SD-NEXT:    ret
@@ -1438,6 +1529,7 @@ define i64 @umaddl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umaddl_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    madd x0, x9, x8, x2
 ; CHECK-GI-NEXT:    ret
@@ -1476,6 +1568,7 @@ define i64 @umaddl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umaddl_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umaddl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1483,6 +1576,7 @@ define i64 @umaddl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umaddl_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    madd x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1567,6 +1661,7 @@ define i64 @umnegl_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umnegl_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umnegl x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1574,6 +1669,7 @@ define i64 @umnegl_ldrb_h(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umnegl_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1590,6 +1686,7 @@ define i64 @umnegl_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-SD-LABEL: umnegl_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umnegl x0, w9, w8
 ; CHECK-SD-NEXT:    ret
@@ -1597,6 +1694,7 @@ define i64 @umnegl_ldrb_h_commuted(ptr %x0, i16 %x1) {
 ; CHECK-GI-LABEL: umnegl_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    mneg x0, x9, x8
 ; CHECK-GI-NEXT:    ret
@@ -1635,6 +1733,7 @@ define i64 @umnegl_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-SD-LABEL: umnegl_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umnegl x0, w8, w9
 ; CHECK-SD-NEXT:    ret
@@ -1642,6 +1741,7 @@ define i64 @umnegl_ldr_b(ptr %x0, i8 %x1) {
 ; CHECK-GI-LABEL: umnegl_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    mneg x0, x8, x9
 ; CHECK-GI-NEXT:    ret
@@ -1726,6 +1826,7 @@ define i64 @umsubl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umsubl_ldrb_h:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1733,6 +1834,7 @@ define i64 @umsubl_ldrb_h(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umsubl_ldrb_h:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -1749,6 +1851,7 @@ define i64 @umsubl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umsubl_ldrb_h_commuted:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldrb w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xffff
 ; CHECK-SD-NEXT:    umsubl x0, w9, w8, x2
 ; CHECK-SD-NEXT:    ret
@@ -1756,6 +1859,7 @@ define i64 @umsubl_ldrb_h_commuted(ptr %x0, i16 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umsubl_ldrb_h_commuted:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldrb w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xffff
 ; CHECK-GI-NEXT:    msub x0, x9, x8, x2
 ; CHECK-GI-NEXT:    ret
@@ -1794,6 +1898,7 @@ define i64 @umsubl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-SD-LABEL: umsubl_ldr_b:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr w8, [x0]
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x9, x1, #0xff
 ; CHECK-SD-NEXT:    umsubl x0, w8, w9, x2
 ; CHECK-SD-NEXT:    ret
@@ -1801,6 +1906,7 @@ define i64 @umsubl_ldr_b(ptr %x0, i8 %x1, i64 %x2) {
 ; CHECK-GI-LABEL: umsubl_ldr_b:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr w8, [x0]
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    msub x0, x8, x9, x2
 ; CHECK-GI-NEXT:    ret
@@ -2013,13 +2119,22 @@ define i64 @umaddl_and_and(i64 %x, i64 %y, i64 %a) {
 
 ; Check which can contain multiple copies that should all be removed.
 define i32 @f(i32 %0) {
-; CHECK-LABEL: f:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    neg w8, w0
-; CHECK-NEXT:  .LBB93_1: // %B
-; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    cbnz x8, .LBB93_1
-; CHECK-NEXT:    b .LBB93_1
+; CHECK-SD-LABEL: f:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    neg w8, w0
+; CHECK-SD-NEXT:  .LBB93_1: // %B
+; CHECK-SD-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-SD-NEXT:    cbnz x8, .LBB93_1
+; CHECK-SD-NEXT:    b .LBB93_1
+;
+; CHECK-GI-LABEL: f:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    neg w8, w0
+; CHECK-GI-NEXT:  .LBB93_1: // %B
+; CHECK-GI-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-GI-NEXT:    cbnz x8, .LBB93_1
+; CHECK-GI-NEXT:    b .LBB93_1
 entry:
   %1 = sext i32 %0 to i64
   br label %A

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
index ef7e9f6e0fe434..e11ae9a2515900 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mulv.ll
@@ -27,6 +27,7 @@ declare i128 @llvm.vector.reduce.mul.v2i128(<2 x i128>)
 define i8 @mulv_v2i8(<2 x i8> %a) {
 ; CHECK-LABEL: mulv_v2i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mul w0, w9, w8
@@ -50,6 +51,7 @@ entry:
 define i8 @mulv_v4i8(<4 x i8> %a) {
 ; CHECK-SD-LABEL: mulv_v4i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.h[1]
 ; CHECK-SD-NEXT:    umov w9, v0.h[0]
 ; CHECK-SD-NEXT:    umov w10, v0.h[2]
@@ -61,6 +63,7 @@ define i8 @mulv_v4i8(<4 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v4i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -77,6 +80,7 @@ entry:
 define i8 @mulv_v8i8(<8 x i8> %a) {
 ; CHECK-SD-LABEL: mulv_v8i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.b[1]
 ; CHECK-SD-NEXT:    umov w9, v0.b[0]
 ; CHECK-SD-NEXT:    umov w10, v0.b[2]
@@ -96,6 +100,7 @@ define i8 @mulv_v8i8(<8 x i8> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v8i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.b[0]
 ; CHECK-GI-NEXT:    umov w9, v0.b[1]
 ; CHECK-GI-NEXT:    umov w10, v0.b[2]
@@ -218,6 +223,7 @@ entry:
 define i16 @mulv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: mulv_v2i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mul w0, w9, w8
@@ -230,6 +236,7 @@ entry:
 define i16 @mulv_v3i16(<3 x i16> %a) {
 ; CHECK-SD-LABEL: mulv_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.h[1]
 ; CHECK-SD-NEXT:    umov w9, v0.h[0]
 ; CHECK-SD-NEXT:    umov w10, v0.h[2]
@@ -239,6 +246,7 @@ define i16 @mulv_v3i16(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -253,6 +261,7 @@ entry:
 define i16 @mulv_v4i16(<4 x i16> %a) {
 ; CHECK-SD-LABEL: mulv_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.h[1]
 ; CHECK-SD-NEXT:    umov w9, v0.h[0]
 ; CHECK-SD-NEXT:    umov w10, v0.h[2]
@@ -264,6 +273,7 @@ define i16 @mulv_v4i16(<4 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: mulv_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -346,6 +356,7 @@ entry:
 define i32 @mulv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: mulv_v2i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mul w0, w9, w8
@@ -437,8 +448,11 @@ entry:
 define i64 @mulv_v3i64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: mulv_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d2
 ; CHECK-SD-NEXT:    fmov x9, d0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mul x8, x9, x8
 ; CHECK-SD-NEXT:    fmov x9, d1
 ; CHECK-SD-NEXT:    mul x0, x9, x8

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
index 1ba16956cad450..2e165179381820 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll
@@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) {
 ; CHECK-NEXT:    stp xzr, xzr, [x0, #16]
 ; CHECK-NEXT:    uaddlv.8h s0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v0, v1, #0
-; CHECK-NEXT:    ucvtf.4s v0, v0
-; CHECK-NEXT:    str q0, [x0]
+; CHECK-NEXT:    ushll.4s v1, v1, #0
+; CHECK-NEXT:    ucvtf.4s v1, v1
+; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) {
 ; CHECK-NEXT:    add x8, x0, #8
 ; CHECK-NEXT:    uaddlv.8h s0, v0
 ; CHECK-NEXT:    mov.h v1[0], v0[0]
-; CHECK-NEXT:    ushll.4s v0, v1, #0
-; CHECK-NEXT:    ucvtf.4s v0, v0
-; CHECK-NEXT:    st1.s { v0 }[2], [x8]
-; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ushll.4s v1, v1, #0
+; CHECK-NEXT:    ucvtf.4s v1, v1
+; CHECK-NEXT:    st1.s { v1 }[2], [x8]
+; CHECK-NEXT:    str d1, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -278,9 +278,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) {
 ; CHECK-NEXT:    stp q0, q0, [x0, #32]
 ; CHECK-NEXT:    mov.h v2[0], v1[0]
 ; CHECK-NEXT:    bic.4h v2, #255, lsl #8
-; CHECK-NEXT:    ushll.4s v1, v2, #0
-; CHECK-NEXT:    ucvtf.4s v1, v1
-; CHECK-NEXT:    stp q1, q0, [x0]
+; CHECK-NEXT:    ushll.4s v2, v2, #0
+; CHECK-NEXT:    ucvtf.4s v2, v2
+; CHECK-NEXT:    stp q2, q0, [x0]
 ; CHECK-NEXT:    ret
 
 entry:
@@ -490,6 +490,7 @@ define void @store_saddlv_v8i8(ptr %H, <8 x i8> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v8i8:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.8b h0, v0
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret
@@ -505,6 +506,7 @@ define void @store_saddlv_v16i8(ptr %H, <16 x i8> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v16i8:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.16b h0, v0
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret
@@ -520,6 +522,7 @@ define void @store_saddlv_v4i16(ptr %H, <4 x i16> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v4i16:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.4h s0, v0
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret
@@ -535,6 +538,7 @@ define void @store_saddlv_v8i16(ptr %H, <8 x i16> %sum_h, i32 %idx) {
 ; CHECK-LABEL: store_saddlv_v8i16:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    saddlv.8h s0, v0
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sbfiz x8, x1, #3, #32
 ; CHECK-NEXT:    str s0, [x0, x8]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
index 75d7623c4cfb2e..30ce0cb09fc084 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-scalarize-vec-load-ext.ll
@@ -21,6 +21,7 @@ define i32 @narrow_load_v4_i32_single_ele_variable_idx(ptr %ptr, i64 %off, i32 %
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ldr q0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    bfi x8, x2, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr w0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
index 75a96be9b435e2..7f20b5e5ee4df5 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll
@@ -24,6 +24,8 @@ declare i128 @llvm.read_volatile_register.i128(metadata) #1
 define void @test_wsr128(i128 noundef %v) #0 {
 ; CHECK-LE-LABEL: test_wsr128:
 ; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
+; CHECK-LE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; CHECK-LE-NEXT:    msrr S1_2_C3_C4_5, x0, x1
 ; CHECK-LE-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
index 0dc46b5c9eaafd..4cd7c4a034c875 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
@@ -9,6 +9,7 @@ define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    dup v0.4h, v0.h[4]
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext

diff  --git a/llvm/test/CodeGen/AArch64/abs.ll b/llvm/test/CodeGen/AArch64/abs.ll
index fed183148d06ee..d501d9ed24547a 100644
--- a/llvm/test/CodeGen/AArch64/abs.ll
+++ b/llvm/test/CodeGen/AArch64/abs.ll
@@ -248,6 +248,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
 ; CHECK-GI-NEXT:    cmp w8, #0
 ; CHECK-GI-NEXT:    cneg w8, w9, le
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call <1 x i32> @llvm.abs.v1i32(<1 x i32> %a, i1 0)

diff  --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
index 61de2dc4edfd16..025bbf749fc71b 100644
--- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll
+++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll
@@ -129,7 +129,9 @@ define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) {
 ; CHECK-LABEL: lane_mask_nxv2i1_i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0xff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    and x8, x1, #0xff
@@ -342,6 +344,7 @@ define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC)
   ret <16 x i1> %active.lane.mask
@@ -352,6 +355,7 @@ define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, w0, w1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC)
   ret <8 x i1> %active.lane.mask
@@ -362,6 +366,7 @@ define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.h, w0, w1
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC)
   ret <4 x i1> %active.lane.mask
@@ -372,6 +377,7 @@ define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.s, w0, w1
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC)
   ret <2 x i1> %active.lane.mask
@@ -382,6 +388,7 @@ define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC)
   ret <16 x i1> %active.lane.mask
@@ -392,6 +399,7 @@ define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.b, x0, x1
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC)
   ret <8 x i1> %active.lane.mask
@@ -402,6 +410,7 @@ define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.h, x0, x1
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC)
   ret <4 x i1> %active.lane.mask
@@ -412,6 +421,7 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo p0.s, x0, x1
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC)
   ret <2 x i1> %active.lane.mask

diff  --git a/llvm/test/CodeGen/AArch64/adc.ll b/llvm/test/CodeGen/AArch64/adc.ll
index d6ea5cf34529d2..4b1393ffc624aa 100644
--- a/llvm/test/CodeGen/AArch64/adc.ll
+++ b/llvm/test/CodeGen/AArch64/adc.ll
@@ -69,6 +69,7 @@ define i128 @test_shifted(i128 %a, i128 %b) {
 define i128 @test_extended(i128 %a, i16 %b) {
 ; CHECK-LE-LABEL: test_extended:
 ; CHECK-LE:       ; %bb.0:
+; CHECK-LE-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-LE-NEXT:    sxth x8, w2
 ; CHECK-LE-NEXT:    adds x0, x0, w2, sxth #3
 ; CHECK-LE-NEXT:    asr x9, x8, #63
@@ -78,6 +79,7 @@ define i128 @test_extended(i128 %a, i16 %b) {
 ;
 ; CHECK-BE-LABEL: test_extended:
 ; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-BE-NEXT:    sxth x8, w2
 ; CHECK-BE-NEXT:    adds x1, x1, w2, sxth #3
 ; CHECK-BE-NEXT:    asr x9, x8, #63

diff  --git a/llvm/test/CodeGen/AArch64/add-extract.ll b/llvm/test/CodeGen/AArch64/add-extract.ll
index 31c1120a6d0150..67c9f74ee02988 100644
--- a/llvm/test/CodeGen/AArch64/add-extract.ll
+++ b/llvm/test/CodeGen/AArch64/add-extract.ll
@@ -69,6 +69,7 @@ define i64 @add_i64_ext_ext(<1 x i64> %A, <1 x i64> %B) nounwind {
 define i32 @add_i32_ext_load(<1 x i32> %A, ptr %B) nounwind {
 ; CHECK-LABEL: add_i32_ext_load:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    ldr w8, [x0]
 ; CHECK-NEXT:    add w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/add.ll b/llvm/test/CodeGen/AArch64/add.ll
index dbbacf1f55a71d..e3072dc41d933c 100644
--- a/llvm/test/CodeGen/AArch64/add.ll
+++ b/llvm/test/CodeGen/AArch64/add.ll
@@ -399,14 +399,19 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    fmov x9, d5
-; CHECK-GI-NEXT:    add v0.2d, v0.2d, v3.2d
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    add x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    add v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = add <3 x i64> %d, %e

diff  --git a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
index 312fd348fddce3..6636813eb25049 100644
--- a/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
+++ b/llvm/test/CodeGen/AArch64/addimm-mulimm.ll
@@ -318,6 +318,7 @@ define i32 @addmuladd_gep2(ptr %p, i32 %a) {
 ; CHECK-LABEL: addmuladd_gep2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #3240 // =0xca8
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    smaddl x8, w1, w8, x0
 ; CHECK-NEXT:    ldr w8, [x8, #3260]
 ; CHECK-NEXT:    tbnz w8, #31, .LBB22_2

diff  --git a/llvm/test/CodeGen/AArch64/addp-shuffle.ll b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
index 526943eaf88ad2..54c96820285d32 100644
--- a/llvm/test/CodeGen/AArch64/addp-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/addp-shuffle.ll
@@ -28,6 +28,7 @@ define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addp v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
   %r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -50,6 +51,7 @@ define <4 x i16> @deinterleave_shuffle_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   %r1 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -72,6 +74,7 @@ define <8 x i8> @deinterleave_shuffle_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: deinterleave_shuffle_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    addp v0.16b, v0.16b, v0.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   %r1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>

diff  --git a/llvm/test/CodeGen/AArch64/addsub_ext.ll b/llvm/test/CodeGen/AArch64/addsub_ext.ll
index 6c3564375279e9..04a98bd5088803 100644
--- a/llvm/test/CodeGen/AArch64/addsub_ext.ll
+++ b/llvm/test/CodeGen/AArch64/addsub_ext.ll
@@ -26,6 +26,7 @@ define i32 @add_z_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @add_z_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxtb
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -36,6 +37,7 @@ define i64 @add_z_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @add_z_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_shli8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxtb #3
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -68,6 +70,7 @@ define i32 @add_s_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @add_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxtb
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -78,6 +81,7 @@ define i64 @add_s_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @add_s_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_shli8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxtb #3
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -110,6 +114,7 @@ define i32 @add_z_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @add_z_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxth
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -120,6 +125,7 @@ define i64 @add_z_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @add_z_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_z_shli16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, uxth #3
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -173,6 +179,7 @@ define i32 @add_s_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @add_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxth
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -183,6 +190,7 @@ define i64 @add_s_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @add_s_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: add_s_shli16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x0, x1, w0, sxth #3
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -236,6 +244,7 @@ define i32 @sub_z_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @sub_z_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxtb
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -246,6 +255,7 @@ define i64 @sub_z_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @sub_z_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_shli8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxtb #3
 ; CHECK-NEXT:    ret
   %vz = zext i8 %v to i64
@@ -278,6 +288,7 @@ define i32 @sub_s_shli8i32(i8 %v, i32 %lhs) minsize {
 define i64 @sub_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxtb
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -288,6 +299,7 @@ define i64 @sub_s_i8i64(i8 %v, i64 %lhs) minsize {
 define i64 @sub_s_shli8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_shli8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxtb #3
 ; CHECK-NEXT:    ret
   %vz = sext i8 %v to i64
@@ -320,6 +332,7 @@ define i32 @sub_z_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @sub_z_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxth
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -330,6 +343,7 @@ define i64 @sub_z_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @sub_z_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_z_shli16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, uxth #3
 ; CHECK-NEXT:    ret
   %vz = zext i16 %v to i64
@@ -383,6 +397,7 @@ define i32 @sub_s_shli16i32(i16 %v, i32 %lhs) minsize {
 define i64 @sub_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxth
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -393,6 +408,7 @@ define i64 @sub_s_i16i64(i16 %v, i64 %lhs) minsize {
 define i64 @sub_s_shli16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: sub_s_shli16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x0, x1, w0, sxth #3
 ; CHECK-NEXT:    ret
   %vz = sext i16 %v to i64
@@ -428,7 +444,7 @@ define i32 @cmp_s_i8i32(i8 %v, i32 %lhs) minsize {
 ; CHECK-NEXT:    cmp w1, w0, uxtb
 ; CHECK-NEXT:    b.ge .LBB40_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB40_2: // %end
 ; CHECK-NEXT:    mov w0, w1
@@ -445,10 +461,11 @@ end:
 define i64 @cmp_s_i8i64(i8 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: cmp_s_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp x1, w0, sxtb
 ; CHECK-NEXT:    b.ge .LBB41_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB41_2: // %end
 ; CHECK-NEXT:    mov x0, x1
@@ -468,7 +485,7 @@ define i32 @cmp_s_i16i32(i16 %v, i32 %lhs) minsize {
 ; CHECK-NEXT:    cmp w1, w0, uxth
 ; CHECK-NEXT:    b.ge .LBB42_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB42_2: // %end
 ; CHECK-NEXT:    mov w0, w1
@@ -485,10 +502,11 @@ end:
 define i64 @cmp_s_i16i64(i16 %v, i64 %lhs) minsize {
 ; CHECK-LABEL: cmp_s_i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp x1, w0, sxth
 ; CHECK-NEXT:    b.ge .LBB43_2
 ; CHECK-NEXT:  // %bb.1: // %then
-; CHECK-NEXT:    mov w0, #1 // =0x1
+; CHECK-NEXT:    mov w0, #1
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB43_2: // %end
 ; CHECK-NEXT:    mov x0, x1

diff  --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 5046c0571ad2bc..09f00b3845f25f 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -11,6 +11,7 @@ define void @new_position(i32 %pos) {
 ; CHECK-SD-LABEL: new_position:
 ; CHECK-SD:       ; %bb.0: ; %entry
 ; CHECK-SD-NEXT:    adrp x8, _board at GOTPAGE
+; CHECK-SD-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr x8, [x8, _board at GOTPAGEOFF]
 ; CHECK-SD-NEXT:    ldrb w8, [x8, w0, sxtw]
 ; CHECK-SD-NEXT:    sub w8, w8, #1

diff  --git a/llvm/test/CodeGen/AArch64/andorxor.ll b/llvm/test/CodeGen/AArch64/andorxor.ll
index e99a2754ef0857..5c7429aebb31e9 100644
--- a/llvm/test/CodeGen/AArch64/andorxor.ll
+++ b/llvm/test/CodeGen/AArch64/andorxor.ll
@@ -1200,14 +1200,19 @@ define <3 x i64> @and_v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: and_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    fmov x9, d5
-; CHECK-GI-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    and v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = and <3 x i64> %d, %e
@@ -1224,14 +1229,19 @@ define <3 x i64> @or_v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: or_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    fmov x9, d5
-; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    orr v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = or <3 x i64> %d, %e
@@ -1248,14 +1258,19 @@ define <3 x i64> @xor_v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: xor_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    fmov x9, d5
-; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    eor x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    eor v0.16b, v0.16b, v3.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = xor <3 x i64> %d, %e

diff  --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 672002a03d58dc..1b22514a59d608 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -173,6 +173,7 @@ define void @bar(i32 %x, <4 x i32> %y) nounwind {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #80
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str w0, [sp, #60]
 ; CHECK-NEXT:    stp q0, q0, [sp, #16]
 ; CHECK-NEXT:    str x0, [sp]
@@ -240,6 +241,7 @@ define void @bar2(i32 %x, i128 %s41.coerce) nounwind {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    sub sp, sp, #80
 ; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str w0, [sp, #60]
 ; CHECK-NEXT:    stp x1, x2, [sp, #32]
 ; CHECK-NEXT:    stp x1, x2, [sp, #16]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-addp.ll b/llvm/test/CodeGen/AArch64/arm64-addp.ll
index 28a90271dbc368..47ffba9a509ac9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addp.ll
@@ -27,6 +27,7 @@ define i64 @foo0(<2 x i64> %a) nounwind {
 define float @foo1(<2 x float> %a) nounwind {
 ; CHECK-LABEL: foo1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp.2s s0, v0
 ; CHECK-NEXT:    ret
   %lane0.i = extractelement <2 x float> %a, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index f8df92cd7e74d8..65cc368c0b5617 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -11,6 +11,8 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
 ; CHECK-LABEL: fullGtU:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    adrp x9, _block at GOTPAGE
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    sxtw x10, w1
 ; CHECK-NEXT:    ldr x9, [x9, _block at GOTPAGEOFF]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
index 4a84c673af8cfc..37c61d0a4a0fb6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -43,6 +43,10 @@ define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; LSE-LABEL: val_compare_and_swap:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    caspa x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -90,6 +94,10 @@ define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; LSE-LABEL: val_compare_and_swap_seqcst:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    caspal x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -137,6 +145,10 @@ define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) {
 ;
 ; LSE-LABEL: val_compare_and_swap_release:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    caspl x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -184,6 +196,10 @@ define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval)
 ;
 ; LSE-LABEL: val_compare_and_swap_monotonic:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -235,7 +251,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) {
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
-; LSE-NEXT:    and x8, x5, x3
+; LSE-NEXT:    and x8, x7, x3
 ; LSE-NEXT:    and x9, x4, x2
 ; LSE-NEXT:    mvn x10, x9
 ; LSE-NEXT:    mvn x11, x8
@@ -295,7 +311,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    orr x8, x4, x2
-; LSE-NEXT:    orr x9, x5, x3
+; LSE-NEXT:    orr x9, x7, x3
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
@@ -352,7 +368,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    adds x8, x4, x2
-; LSE-NEXT:    adc x9, x5, x3
+; LSE-NEXT:    adc x9, x7, x3
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
@@ -408,7 +424,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    subs x8, x4, x2
-; LSE-NEXT:    sbc x9, x5, x3
+; LSE-NEXT:    sbc x9, x7, x3
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
 ; LSE-NEXT:    caspal x4, x5, x8, x9, [x0]
@@ -468,8 +484,8 @@ define void @fetch_and_min(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x5
-; LSE-NEXT:    csel x9, x5, x3, ge
+; LSE-NEXT:    sbcs xzr, x3, x7
+; LSE-NEXT:    csel x9, x7, x3, ge
 ; LSE-NEXT:    csel x8, x4, x2, ge
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -530,8 +546,8 @@ define void @fetch_and_max(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x5
-; LSE-NEXT:    csel x9, x5, x3, lt
+; LSE-NEXT:    sbcs xzr, x3, x7
+; LSE-NEXT:    csel x9, x7, x3, lt
 ; LSE-NEXT:    csel x8, x4, x2, lt
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -592,8 +608,8 @@ define void @fetch_and_umin(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x5
-; LSE-NEXT:    csel x9, x5, x3, hs
+; LSE-NEXT:    sbcs xzr, x3, x7
+; LSE-NEXT:    csel x9, x7, x3, hs
 ; LSE-NEXT:    csel x8, x4, x2, hs
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -654,8 +670,8 @@ define void @fetch_and_umax(ptr %p, i128 %bits) {
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
 ; LSE-NEXT:    cmp x2, x4
-; LSE-NEXT:    sbcs xzr, x3, x5
-; LSE-NEXT:    csel x9, x5, x3, lo
+; LSE-NEXT:    sbcs xzr, x3, x7
+; LSE-NEXT:    csel x9, x7, x3, lo
 ; LSE-NEXT:    csel x8, x4, x2, lo
 ; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
@@ -698,8 +714,8 @@ define i128 @atomic_load_seq_cst(ptr %p) {
 ;
 ; LSE-LABEL: atomic_load_seq_cst:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    mov x2, #0 // =0x0
-; LSE-NEXT:    mov x3, #0 // =0x0
+; LSE-NEXT:    mov x2, #0
+; LSE-NEXT:    mov x3, #0
 ; LSE-NEXT:    caspal x2, x3, x2, x3, [x0]
 ; LSE-NEXT:    mov x0, x2
 ; LSE-NEXT:    mov x1, x3
@@ -731,8 +747,8 @@ define i128 @atomic_load_relaxed(i64, i64, ptr %p) {
 ;
 ; LSE-LABEL: atomic_load_relaxed:
 ; LSE:       // %bb.0:
-; LSE-NEXT:    mov x0, #0 // =0x0
-; LSE-NEXT:    mov x1, #0 // =0x0
+; LSE-NEXT:    mov x0, #0
+; LSE-NEXT:    mov x1, #0
 ; LSE-NEXT:    casp x0, x1, x0, x1, [x2]
 ; LSE-NEXT:    ret
     %r = load atomic i128, ptr %p monotonic, align 16
@@ -763,7 +779,9 @@ define void @atomic_store_seq_cst(i128 %in, ptr %p) {
 ;
 ; LSE-LABEL: atomic_store_seq_cst:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:    ldp x4, x5, [x2]
+; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:  .LBB14_1: // %atomicrmw.start
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x6, x4
@@ -803,7 +821,9 @@ define void @atomic_store_release(i128 %in, ptr %p) {
 ;
 ; LSE-LABEL: atomic_store_release:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:    ldp x4, x5, [x2]
+; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:  .LBB15_1: // %atomicrmw.start
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x6, x4
@@ -843,7 +863,9 @@ define void @atomic_store_relaxed(i128 %in, ptr %p) {
 ;
 ; LSE-LABEL: atomic_store_relaxed:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:    ldp x4, x5, [x2]
+; LSE-NEXT:    // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1
 ; LSE-NEXT:  .LBB16_1: // %atomicrmw.start
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x6, x4
@@ -899,6 +921,10 @@ define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) {
 ;
 ; LSE-LABEL: cmpxchg_dead:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
 ; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
 ; LSE-NEXT:    ret
   cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic

diff  --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index fb17bdac1fe78d..bdbff0563a22b9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -771,6 +771,7 @@ entry:
 define i64 @fct18(i32 %xor72) nounwind ssp {
 ; LLC-LABEL: fct18:
 ; LLC:       // %bb.0:
+; LLC-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; LLC-NEXT:    ubfx x0, x0, #9, #8
 ; LLC-NEXT:    ret
 ; OPT-LABEL: @fct18(

diff  --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
index 3dd384ab2aa9ab..82802c79c70858 100644
--- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll
@@ -6,6 +6,10 @@
 define <4 x float>  @foo(float %a, float %b, float %c, float %d) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-NEXT:    mov v0.s[3], v3.s[0]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
index 2283cd9e771651..1b9db8b70a1fb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -612,6 +612,7 @@ define <1 x i8> @getL() {
 ; CHECK-LABEL: _setL
 ; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
 ; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L at GOTPAGE
+; CHECK-NEXT: ; kill
 ; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
 ; CHECK-NEXT: ldr {{[xw]}}[[LDRGOT_REG:[0-9]+]], [[[ADRP_REG]], _L at GOTPAGEOFF]
 ; Ultimately we should generate str b0, but right now, we match the vector

diff  --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 66356a45b2e514..4c28ea75922024 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -43,6 +43,7 @@ define <2 x i32> @v_dup32(i32 %A) nounwind {
 define <2 x float> @v_dupfloat(float %A) nounwind {
 ; CHECK-LABEL: v_dupfloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
@@ -118,6 +119,7 @@ define <4 x i16> @v_dup16_const(i16 %y, ptr %p) {
 define <4 x float> @v_dupQfloat(float %A) nounwind {
 ; CHECK-LABEL: v_dupQfloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
@@ -162,6 +164,7 @@ define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
 define <2 x float> @v_shuffledupfloat(float %A) nounwind {
 ; CHECK-LABEL: v_shuffledupfloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <2 x float> undef, float %A, i32 0
@@ -202,6 +205,7 @@ define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
 define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 ; CHECK-LABEL: v_shuffledupQfloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    ret
   %tmp1 = insertelement <4 x float> undef, float %A, i32 0
@@ -212,6 +216,7 @@ define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
 define <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
 ; CHECK-LABEL: vduplane8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.8b v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -221,6 +226,7 @@ define <8 x i8> @vduplane8(<8 x i8> %A) nounwind {
 define <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
 ; CHECK-LABEL: vduplane16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.4h v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -230,6 +236,7 @@ define <4 x i16> @vduplane16(<4 x i16> %A) nounwind {
 define <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
 ; CHECK-LABEL: vduplane32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
@@ -239,6 +246,7 @@ define <2 x i32> @vduplane32(<2 x i32> %A) nounwind {
 define <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
 ; CHECK-LABEL: vduplanefloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.2s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
@@ -248,6 +256,7 @@ define <2 x float> @vduplanefloat(<2 x float> %A) nounwind {
 define <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.16b v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <8 x i8> %A, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -257,6 +266,7 @@ define <16 x i8> @vduplaneQ8(<8 x i8> %A) nounwind {
 define <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.8h v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <4 x i16> %A, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
@@ -266,6 +276,7 @@ define <8 x i16> @vduplaneQ16(<4 x i16> %A) nounwind {
 define <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
 ; CHECK-LABEL: vduplaneQ32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x i32> %A, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -275,6 +286,7 @@ define <4 x i32> @vduplaneQ32(<2 x i32> %A) nounwind {
 define <4 x float> @vduplaneQfloat(<2 x float> %A) nounwind {
 ; CHECK-LABEL: vduplaneQfloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[1]
 ; CHECK-NEXT:    ret
   %tmp2 = shufflevector <2 x float> %A, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
@@ -326,12 +338,14 @@ define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov.s v0[1], w1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: f:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov.s v0[0], w0
 ; CHECK-GI-NEXT:    mov.s v0[1], w1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
   %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
@@ -395,6 +409,7 @@ define <4 x i16> @test_build_illegal(<4 x i32> %in) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov.s w8, v0[3]
 ; CHECK-GI-NEXT:    mov.h v0[3], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %val = extractelement <4 x i32> %in, i32 3
   %smallval = trunc i32 %val to i16
@@ -430,15 +445,20 @@ define <4 x i16> @test_perfectshuffle_dupext_v4i16(<4 x i16> %a, <4 x i16> %b) n
 ; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    trn1.4h v0, v0, v0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov.s v0[1], v1[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI34_0
+; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI34_0]
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
   ret <4 x i16> %r
@@ -448,15 +468,20 @@ define <4 x half> @test_perfectshuffle_dupext_v4f16(<4 x half> %a, <4 x half> %b
 ; CHECK-SD-LABEL: test_perfectshuffle_dupext_v4f16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    trn1.4h v0, v0, v0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov.s v0[1], v1[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI35_0]
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
   ret <4 x half> %r
@@ -472,7 +497,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
@@ -489,7 +516,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float>
 ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI37_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
 ; CHECK-GI-NEXT:    ret
   %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 4, i32 5>
@@ -509,13 +538,14 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) {
 ; CHECK-GI-LABEL: disguised_dup:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI38_1
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_1]
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
-; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v1
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI38_0]
-; CHECK-GI-NEXT:    tbl.16b v1, { v0, v1 }, v1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    tbl.16b v2, { v0, v1 }, v2
 ; CHECK-GI-NEXT:    str q0, [x0]
-; CHECK-GI-NEXT:    str q1, [x1]
+; CHECK-GI-NEXT:    str q2, [x1]
 ; CHECK-GI-NEXT:    ret
   %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 0>
   %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
@@ -557,6 +587,7 @@ define <2 x i32> @dup_const4_ext(<4 x i32> %A) nounwind {
 ; CHECK-GI-NEXT:    adrp x8, .LCPI40_0
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI40_0]
 ; CHECK-GI-NEXT:    add.4s v0, v0, v1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = add <4 x i32> %A, <i32 8421377, i32 8421377, i32 8421377, i32 8421377>
   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -567,6 +598,7 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
 ; CHECK-SD-LABEL: dup_const24:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    movk w8, #128, lsl #16
 ; CHECK-SD-NEXT:    dup.4s v3, w8
 ; CHECK-SD-NEXT:    add.2s v0, v0, v3
@@ -578,6 +610,7 @@ define <4 x i32> @dup_const24(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C) nounwind
 ; CHECK-GI-LABEL: dup_const24:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI41_1
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    ldr d3, [x8, :lo12:.LCPI41_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI41_0
 ; CHECK-GI-NEXT:    add.2s v0, v0, v3
@@ -623,6 +656,7 @@ define <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) {
 define <8 x i16> @bitcast_f64_v8i16(double %a) {
 ; CHECK-LABEL: bitcast_f64_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup.8h v0, v0[0]
 ; CHECK-NEXT:    ret
   %b = bitcast double %a to <4 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll
index 6a6901790e5c35..50df6a0388587b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll
@@ -108,8 +108,11 @@ define <4 x i16> @test_undef(<8 x i16> %tmp1, <8 x i16> %tmp2) {
 ; CHECK-GI-LABEL: test_undef:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
   ret <4 x i16> %tmp3

diff  --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
index 987e09fc19dd10..7a4cdd52db904a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
@@ -8,6 +8,8 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    umov w9, v0.b[1]
 ; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #3
@@ -26,6 +28,7 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
 ; CHECK-GISEL-NEXT:    sub sp, sp, #16
 ; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-GISEL-NEXT:    mov w9, w0
+; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    mov b1, v0.b[1]
 ; CHECK-GISEL-NEXT:    add x8, sp, #8
 ; CHECK-GISEL-NEXT:    and x9, x9, #0x7
@@ -62,6 +65,7 @@ define <8 x i8> @test_varidx_extract_v16s8(<16 x i8> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfxil x8, x0, #0, #4
 ; CHECK-SDAG-NEXT:    ldr b1, [x8]
@@ -130,6 +134,7 @@ define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SDAG-NEXT:    ldr w0, [x8]
@@ -140,6 +145,7 @@ define i16 @test_varidx_extract_v2s16(<2 x i16> %x, i32 %idx) {
 ; CHECK-GISEL:       // %bb.0:
 ; CHECK-GISEL-NEXT:    sub sp, sp, #16
 ; CHECK-GISEL-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    mov s1, v0.s[1]
 ; CHECK-GISEL-NEXT:    mov w9, w0
 ; CHECK-GISEL-NEXT:    add x8, sp, #12
@@ -159,11 +165,14 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    umov w9, v0.h[1]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SDAG-NEXT:    ld1 { v0.h }[0], [x8]
 ; CHECK-SDAG-NEXT:    mov v0.s[1], w9
+; CHECK-SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SDAG-NEXT:    add sp, sp, #16
 ; CHECK-SDAG-NEXT:    ret
 ;
@@ -175,11 +184,13 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
 ; CHECK-GISEL-NEXT:    mov w8, #2 // =0x2
 ; CHECK-GISEL-NEXT:    add x10, sp, #8
 ; CHECK-GISEL-NEXT:    and x9, x9, #0x3
+; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GISEL-NEXT:    str d0, [sp, #8]
 ; CHECK-GISEL-NEXT:    madd x8, x9, x8, x10
 ; CHECK-GISEL-NEXT:    umov w9, v0.h[1]
 ; CHECK-GISEL-NEXT:    ld1 { v0.h }[0], [x8]
 ; CHECK-GISEL-NEXT:    mov v0.s[1], w9
+; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    add sp, sp, #16
 ; CHECK-GISEL-NEXT:    ret
   %tmp = extractelement <4 x i16> %x, i32 %idx
@@ -195,6 +206,7 @@ define <4 x i16> @test_varidx_extract_v8s16(<8 x i16> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SDAG-NEXT:    ldr h1, [x8]
@@ -237,6 +249,7 @@ define i32 @test_varidx_extract_v2s32(<2 x i32> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    add x8, sp, #8
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str d0, [sp, #8]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SDAG-NEXT:    ldr w0, [x8]
@@ -264,6 +277,7 @@ define <2 x i32> @test_varidx_extract_v4s32(<4 x i32> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SDAG-NEXT:    ldr s1, [x8]
@@ -298,6 +312,7 @@ define i64 @test_varidx_extract_v2s64(<2 x i64> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SDAG-NEXT:    ldr x0, [x8]
@@ -325,6 +340,7 @@ define ptr @test_varidx_extract_v2p0(<2 x ptr> %x, i32 %idx) {
 ; CHECK-SDAG-NEXT:    sub sp, sp, #16
 ; CHECK-SDAG-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SDAG-NEXT:    mov x8, sp
+; CHECK-SDAG-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SDAG-NEXT:    str q0, [sp]
 ; CHECK-SDAG-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SDAG-NEXT:    ldr x0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll b/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
index df6caad99066f6..f08ff2ccf693d9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract_subvector.ll
@@ -8,6 +8,7 @@ define <8 x i8> @v8i8(<16 x i8> %a) nounwind {
 ; CHECK-SD-LABEL: v8i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v8i8:
@@ -22,6 +23,7 @@ define <4 x i16> @v4i16(<8 x i16> %a) nounwind {
 ; CHECK-SD-LABEL: v4i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v4i16:
@@ -36,6 +38,7 @@ define <2 x i32> @v2i32(<4 x i32> %a) nounwind {
 ; CHECK-SD-LABEL: v2i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v2i32:
@@ -50,6 +53,7 @@ define <1 x i64> @v1i64(<2 x i64> %a) nounwind {
 ; CHECK-SD-LABEL: v1i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v1i64:
@@ -64,6 +68,7 @@ define <1 x ptr> @v1p0(<2 x ptr> %a) nounwind {
 ; CHECK-SD-LABEL: v1p0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v1p0:
@@ -78,6 +83,7 @@ define <2 x float> @v2f32(<4 x float> %a) nounwind {
 ; CHECK-SD-LABEL: v2f32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v2f32:
@@ -92,6 +98,7 @@ define <1 x double> @v1f64(<2 x double> %a) nounwind {
 ; CHECK-SD-LABEL: v1f64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext.16b v0, v0, v0, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v1f64:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
index 74d117852fd7cd..d5324e4274725b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll
@@ -7,7 +7,10 @@ define float @test1(float %x, float %y) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    mvni.4s v2, #128, lsl #24
+; CHECK-NEXT:    ; kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    ; kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    bif.16b v0, v1, v2
+; CHECK-NEXT:    ; kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
@@ -18,8 +21,11 @@ define double @test2(double %x, double %y) nounwind {
 ; CHECK-LABEL: test2:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg.2d v2, v2
 ; CHECK-NEXT:    bif.16b v0, v1, v2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call double @copysign(double %x, double %y) nounwind readnone
@@ -32,9 +38,11 @@ define double @test3(double %a, float %b, float %c) nounwind {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.2d v3, #0xffffffffffffffff
 ; CHECK-NEXT:    fadd s1, s1, s2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fneg.2d v2, v3
 ; CHECK-NEXT:    fcvt d1, s1
 ; CHECK-NEXT:    bif.16b v0, v1, v2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp1 = fadd float %b, %c
   %tmp2 = fpext float %tmp1 to double

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
index 00e30a5c9f1718..b580c4921fb66a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fixed-point-scalar-cvt-dagcombine.ll
@@ -25,6 +25,7 @@ define double @bar(ptr %iVals, ptr %fVals, ptr %dVals) {
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    sri d0, d0, #1
 ; CHECK-NEXT:    scvtf.2d v0, v0, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %arrayidx = getelementptr inbounds double, ptr %dVals, i64 16

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index 8ae1e0768db3db..d7d54a6e48a92d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -71,6 +71,7 @@ define i64 @test_integer(i64  %in) {
 define float @test_f16(half %in) {
 ; CHECK-LABEL: test_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fcvt s1, h0
 ; CHECK-NEXT:    movi d2, #0000000000000000
 ; CHECK-NEXT:    fcmp s1, #0.0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index a3d48268957cc8..7eb26096ed1566 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -602,6 +602,7 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -620,6 +621,7 @@ define <2 x i32> @vec_fptosi_32(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
   %val32 = fptosi <2 x fp128> %val to <2 x i32>
@@ -684,6 +686,7 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -702,6 +705,7 @@ define <2 x i32> @vec_fptoui_32(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
   %val32 = fptoui <2 x fp128> %val to <2 x i32>
@@ -757,6 +761,7 @@ define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatsitf
@@ -778,6 +783,7 @@ define <2 x fp128> @vec_sitofp_32(<2 x i32> %src32) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatsitf
@@ -845,6 +851,7 @@ define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatunsitf
@@ -866,6 +873,7 @@ define <2 x fp128> @vec_uitofp_32(<2 x i32> %src32) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatunsitf
@@ -971,9 +979,10 @@ define <2 x i1> @vec_setcc1(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-GI-NEXT:    bl __letf2
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cset w8, le
+; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
   %val = fcmp ole <2 x fp128> %lhs, %rhs
@@ -1025,9 +1034,10 @@ define <2 x i1> @vec_setcc2(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-GI-NEXT:    bl __letf2
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    cmp w0, #0
-; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    cset w8, gt
+; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
   %val = fcmp ugt <2 x fp128> %lhs, %rhs
@@ -1106,6 +1116,7 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-GI-NEXT:    orr w8, w20, w8
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #96
 ; CHECK-GI-NEXT:    ret
   %val = fcmp ueq <2 x fp128> %lhs, %rhs
@@ -1115,6 +1126,7 @@ define <2 x i1> @vec_setcc3(<2 x fp128> %lhs, <2 x fp128> %rhs) {
 define <2 x fp128> @vec_select(<2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rhs) {
 ; CHECK-SD-LABEL: vec_select:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w8, s0
 ; CHECK-SD-NEXT:    tst w8, #0x1
 ; CHECK-SD-NEXT:    b.eq .LBB40_2
@@ -1133,6 +1145,7 @@ define <2 x fp128> @vec_select(<2 x i1> %cond, <2 x fp128> %lhs, <2 x fp128> %rh
 ;
 ; CHECK-GI-LABEL: vec_select:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w9, s0
 ; CHECK-GI-NEXT:    mov d5, v1.d[1]
@@ -1170,12 +1183,15 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfhf2
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfhf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1190,9 +1206,11 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfhf2
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
@@ -1201,6 +1219,7 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
   %dst = fptrunc <2 x fp128> %val to <2 x half>
@@ -1217,12 +1236,15 @@ define <2 x float> @vec_round_f32(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfsf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfsf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1234,10 +1256,12 @@ define <2 x float> @vec_round_f32(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfsf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfsf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    fmov d0, d1
@@ -1257,10 +1281,12 @@ define <2 x double> @vec_round_f64(<2 x fp128> %val) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfdf2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfdf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1274,10 +1300,12 @@ define <2 x double> @vec_round_f64(<2 x fp128> %val) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfdf2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfdf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v1.16b
@@ -1294,7 +1322,9 @@ define <2 x fp128> @vec_extend_f16(<2 x half> %val) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendhftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -1315,7 +1345,9 @@ define <2 x fp128> @vec_extend_f16(<2 x half> %val) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendhftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -1337,7 +1369,9 @@ define <2 x fp128> @vec_extend_f32(<2 x float> %val) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -1358,7 +1392,9 @@ define <2 x fp128> @vec_extend_f32(<2 x float> %val) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendsftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -1381,6 +1417,7 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1401,6 +1438,7 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8

diff  --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 9b799da165fc3e..0412aef7545e9d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -821,11 +821,13 @@ define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) {
 define ptr @test_v8i8_post_imm_st1_lane(<8 x i8> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], #1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b0, v0[3]
 ; CHECK-GI-NEXT:    str b0, [x0], #1
 ; CHECK-GI-NEXT:    ret
@@ -840,11 +842,13 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #2 ; =0x2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.b { v0 }[3], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b0, v0[3]
 ; CHECK-GI-NEXT:    str b0, [x0], #2
 ; CHECK-GI-NEXT:    ret
@@ -858,11 +862,13 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) {
 define ptr @test_v4i16_post_imm_st1_lane(<4 x i16> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], #2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h0, v0[3]
 ; CHECK-GI-NEXT:    str h0, [x0], #2
 ; CHECK-GI-NEXT:    ret
@@ -877,11 +883,13 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #4 ; =0x4
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.h { v0 }[3], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h0, v0[3]
 ; CHECK-GI-NEXT:    str h0, [x0], #4
 ; CHECK-GI-NEXT:    ret
@@ -895,11 +903,13 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) {
 define ptr @test_v2i32_post_imm_st1_lane(<2 x i32> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #4
 ; CHECK-GI-NEXT:    ret
@@ -914,11 +924,13 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #8
 ; CHECK-GI-NEXT:    ret
@@ -932,11 +944,13 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) {
 define ptr @test_v2f32_post_imm_st1_lane(<2 x float> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1_lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #4
 ; CHECK-GI-NEXT:    ret
@@ -951,11 +965,13 @@ define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1_lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #8 ; =0x8
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    st1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st1_lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0[1]
 ; CHECK-GI-NEXT:    str s0, [x0], #8
 ; CHECK-GI-NEXT:    ret
@@ -5474,14 +5490,18 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -5493,14 +5513,18 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -5515,14 +5539,18 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>,
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -5534,14 +5562,18 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x
 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.b { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -5556,14 +5588,18 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -5575,15 +5611,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -5598,14 +5638,18 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>,
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -5617,15 +5661,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.h { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -5640,14 +5688,18 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>,
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -5659,15 +5711,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -5682,14 +5738,18 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>,
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -5701,15 +5761,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -5724,14 +5788,18 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>,
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -5743,15 +5811,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -5766,14 +5838,18 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>,
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -5785,15 +5861,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <
 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -5808,14 +5888,18 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>,
 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -5827,15 +5911,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt
 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -5850,14 +5938,18 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl
 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -5869,15 +5961,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt
 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -5892,14 +5988,18 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl
 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -5911,15 +6011,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %
 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -5934,14 +6038,18 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x
 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -5953,15 +6061,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %
 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ld2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -5976,14 +6088,20 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -5995,14 +6113,20 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A,
 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -6017,14 +6141,20 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -6036,14 +6166,20 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr
 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -6058,14 +6194,20 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #6
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -6077,15 +6219,21 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A,
 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -6100,14 +6248,20 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #6
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -6119,15 +6273,21 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A,
 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -6142,14 +6302,20 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -6161,15 +6327,21 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A,
 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -6184,14 +6356,20 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -6203,15 +6381,21 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A,
 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -6226,14 +6410,20 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -6245,15 +6435,21 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A,
 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -6268,14 +6464,20 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -6287,15 +6489,21 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A,
 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -6310,14 +6518,20 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -6329,15 +6543,21 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt
 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -6352,14 +6572,20 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -6371,15 +6597,21 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt
 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -6394,14 +6626,20 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -6413,15 +6651,21 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane
 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -6436,14 +6680,20 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -6455,15 +6705,21 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane
 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -6478,14 +6734,22 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -6497,14 +6761,22 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la
 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -6519,14 +6791,22 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -6538,14 +6818,22 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt
 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -6560,14 +6848,22 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -6579,15 +6875,23 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la
 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -6602,14 +6906,22 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -6621,15 +6933,23 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la
 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -6644,14 +6964,22 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -6663,15 +6991,23 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la
 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -6686,14 +7022,22 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -6705,15 +7049,23 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la
 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -6728,14 +7080,22 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -6747,15 +7107,23 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la
 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -6770,14 +7138,22 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -6789,15 +7165,23 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la
 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -6812,14 +7196,22 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -6831,15 +7223,23 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i
 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -6854,14 +7254,22 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -6873,15 +7281,23 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i
 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -6896,14 +7312,22 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -6915,15 +7339,23 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po
 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -6938,14 +7370,22 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -6957,15 +7397,23 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po
 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[0], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -6980,13 +7428,17 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64
 define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -6997,13 +7449,17 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
 define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.16b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -7017,13 +7473,17 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
 define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -7034,13 +7494,17 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n
 define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.8b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -7054,13 +7518,17 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -7072,6 +7540,8 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.8h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7079,6 +7549,8 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -7092,13 +7564,17 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
 define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -7110,6 +7586,8 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.4h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7117,6 +7595,8 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -7130,13 +7610,17 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
 define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -7148,6 +7632,8 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7155,6 +7641,8 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -7168,13 +7656,17 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
 define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -7186,6 +7678,8 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7193,6 +7687,8 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -7206,13 +7702,17 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
 define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -7224,6 +7724,8 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7231,6 +7733,8 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -7244,13 +7748,17 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
 define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -7262,6 +7770,8 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7269,6 +7779,8 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -7282,13 +7794,17 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
 define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -7300,6 +7816,8 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7307,6 +7825,8 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -7320,13 +7840,17 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr)
 define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -7338,6 +7862,8 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st2.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7345,6 +7871,8 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st2.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -7358,13 +7886,17 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr)
 define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -7376,6 +7908,8 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7383,6 +7917,8 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -7396,13 +7932,17 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr)
 define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -7414,6 +7954,8 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7421,6 +7963,8 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -7434,13 +7978,19 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr)
 define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -7451,13 +8001,19 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
 define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.16b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -7471,13 +8027,19 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt
 define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -7488,13 +8050,19 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <
 define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.8b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -7508,13 +8076,19 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -7525,14 +8099,20 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.8h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -7546,13 +8126,19 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt
 define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -7563,14 +8149,20 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.4h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -7584,13 +8176,19 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt
 define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -7601,14 +8199,20 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -7622,13 +8226,19 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt
 define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -7639,14 +8249,20 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -7660,13 +8276,19 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt
 define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -7677,14 +8299,20 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -7698,13 +8326,19 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt
 define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -7715,14 +8349,20 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -7736,13 +8376,19 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt
 define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -7753,14 +8399,20 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -7774,13 +8426,19 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x floa
 define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -7791,14 +8449,20 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st3.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st3.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -7812,13 +8476,19 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa
 define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -7829,14 +8499,20 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -7850,13 +8526,19 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do
 define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -7867,14 +8549,20 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -7888,13 +8576,21 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do
 define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -7905,13 +8601,21 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C
 define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.16b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -7925,13 +8629,21 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1
 define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -7942,13 +8654,21 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <
 define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.8b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -7962,13 +8682,21 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i
 define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -7979,14 +8707,22 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C
 define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.8h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -8000,13 +8736,21 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8
 define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -8017,14 +8761,22 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C
 define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.4h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -8038,13 +8790,21 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4
 define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -8055,14 +8815,22 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C
 define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -8076,13 +8844,21 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4
 define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -8093,14 +8869,22 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C
 define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -8114,13 +8898,21 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2
 define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -8131,14 +8923,22 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C
 define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -8152,13 +8952,21 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2
 define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -8169,14 +8977,22 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C
 define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -8190,13 +9006,21 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1
 define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -8207,14 +9031,22 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float
 define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -8228,13 +9060,21 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa
 define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -8245,14 +9085,22 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float
 define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st4.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st4.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -8266,13 +9114,21 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa
 define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -8283,14 +9139,22 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub
 define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -8304,13 +9168,21 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do
 define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -8321,14 +9193,22 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub
 define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -8342,13 +9222,17 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do
 define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -8359,13 +9243,17 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
 define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.16b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.16b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A)
@@ -8379,13 +9267,17 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr)
 define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -8396,13 +9288,17 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C)
 define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.8b { v0, v1 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.8b { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A)
@@ -8416,13 +9312,17 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -8434,6 +9334,8 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.8h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8441,6 +9343,8 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.8h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A)
@@ -8454,13 +9358,17 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr)
 define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -8472,6 +9380,8 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.4h { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8479,6 +9389,8 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.4h { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A)
@@ -8492,13 +9404,17 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr)
 define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -8510,6 +9426,8 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8517,6 +9435,8 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A)
@@ -8530,13 +9450,17 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr)
 define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -8548,6 +9472,8 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8555,6 +9481,8 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A)
@@ -8568,13 +9496,17 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr)
 define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -8586,6 +9518,8 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8593,6 +9527,8 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A)
@@ -8606,13 +9542,17 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr)
 define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -8624,6 +9564,8 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8631,6 +9573,8 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A)
@@ -8644,13 +9588,17 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr)
 define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -8662,6 +9610,8 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.4s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8669,6 +9619,8 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.4s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A)
@@ -8682,13 +9634,17 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr)
 define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -8700,6 +9656,8 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.2s { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8707,6 +9665,8 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.2s { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A)
@@ -8720,13 +9680,17 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr)
 define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -8738,6 +9702,8 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st1.2d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8745,6 +9711,8 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st1.2d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A)
@@ -8758,13 +9726,17 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr)
 define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -8776,6 +9748,8 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
 ; CHECK-SD-NEXT:    st1.1d { v0, v1 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -8783,6 +9757,8 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1
 ; CHECK-GI-NEXT:    st1.1d { v0, v1 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A)
@@ -8796,13 +9772,19 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr)
 define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -8813,13 +9795,19 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
 define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A)
@@ -8833,13 +9821,19 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>,
 define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -8850,13 +9844,19 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C,
 define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A)
@@ -8870,13 +9870,19 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
 define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -8887,14 +9893,20 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A)
@@ -8908,13 +9920,19 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>,
 define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -8925,14 +9943,20 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A)
@@ -8946,13 +9970,19 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,
 define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -8963,14 +9993,20 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A)
@@ -8984,13 +10020,19 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,
 define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -9001,14 +10043,20 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A)
@@ -9022,13 +10070,19 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>,
 define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -9039,14 +10093,20 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A)
@@ -9060,13 +10120,19 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,
 define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -9077,14 +10143,20 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A)
@@ -9098,13 +10170,19 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,
 define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -9115,14 +10193,20 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A)
@@ -9136,13 +10220,19 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl
 define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -9153,14 +10243,20 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A)
@@ -9174,13 +10270,19 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl
 define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], #48
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #48
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -9191,14 +10293,20 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A)
@@ -9212,13 +10320,19 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x
 define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -9229,14 +10343,20 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A)
@@ -9250,13 +10370,21 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x
 define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -9267,13 +10395,21 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8>
 define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.16b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.16b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A)
@@ -9287,13 +10423,21 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>,
 define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -9304,13 +10448,21 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C,
 define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.8b { v0, v1, v2, v3 }, [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.8b { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A)
@@ -9324,13 +10476,21 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x
 define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -9341,14 +10501,22 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16>
 define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.8h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.8h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A)
@@ -9362,13 +10530,21 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>,
 define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -9379,14 +10555,22 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16>
 define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.4h { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.4h { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A)
@@ -9400,13 +10584,21 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<
 define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -9417,14 +10609,22 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32>
 define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A)
@@ -9438,13 +10638,21 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<
 define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -9455,14 +10663,22 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32>
 define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A)
@@ -9476,13 +10692,21 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>,
 define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -9493,14 +10717,22 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64>
 define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A)
@@ -9514,13 +10746,21 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<
 define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -9531,14 +10771,22 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64>
 define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A)
@@ -9552,13 +10800,21 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<
 define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -9569,14 +10825,22 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo
 define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.4s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.4s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A)
@@ -9590,13 +10854,21 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl
 define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -9607,14 +10879,22 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo
 define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.2s { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.2s { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A)
@@ -9628,13 +10908,21 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl
 define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], #64
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #64
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -9645,14 +10933,22 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do
 define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st1.2d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st1.2d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A)
@@ -9666,13 +10962,21 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x
 define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -9683,14 +10987,22 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do
 define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-SD-NEXT:    st1.1d { v0, v1, v2, v3 }, [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3
 ; CHECK-GI-NEXT:    st1.1d { v0, v1, v2, v3 }, [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A)
@@ -9703,13 +11015,17 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x
 define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -9720,13 +11036,17 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
 define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A)
@@ -9740,13 +11060,17 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr)
 define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], #2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -9757,13 +11081,17 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
 define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.b { v0, v1 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.b { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A)
@@ -9777,13 +11105,17 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr)
 define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -9795,6 +11127,8 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9802,6 +11136,8 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A)
@@ -9815,13 +11151,17 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr)
 define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -9833,6 +11173,8 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.h { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9840,6 +11182,8 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.h { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A)
@@ -9853,13 +11197,17 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr)
 define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -9871,6 +11219,8 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9878,6 +11228,8 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A)
@@ -9891,13 +11243,17 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr)
 define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -9909,6 +11265,8 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9916,6 +11274,8 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A)
@@ -9929,13 +11289,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr)
 define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -9947,6 +11311,8 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9954,6 +11320,8 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A)
@@ -9967,13 +11335,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr)
 define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -9985,6 +11357,8 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -9992,6 +11366,8 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A)
@@ -10005,13 +11381,17 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr)
 define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -10023,6 +11403,8 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -10030,6 +11412,8 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A)
@@ -10043,13 +11427,17 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64,
 define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -10061,6 +11449,8 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.s { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -10068,6 +11458,8 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.s { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A)
@@ -10081,13 +11473,17 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64,
 define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -10099,6 +11495,8 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -10106,6 +11504,8 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A)
@@ -10119,13 +11519,17 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64
 define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -10137,6 +11541,8 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    st2.d { v0, v1 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -10144,6 +11550,8 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    st2.d { v0, v1 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A)
@@ -10157,13 +11565,19 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64
 define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -10174,13 +11588,19 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
 define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A)
@@ -10194,13 +11614,19 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>
 define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], #3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -10211,13 +11637,19 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
 define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.b { v0, v1, v2 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.b { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A)
@@ -10231,13 +11663,19 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6
 define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #6
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -10248,14 +11686,20 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A)
@@ -10269,13 +11713,19 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>
 define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], #6
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #6
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -10286,14 +11736,20 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.h { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.h { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A)
@@ -10307,13 +11763,19 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>
 define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -10324,14 +11786,20 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A)
@@ -10345,13 +11813,19 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>
 define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -10362,14 +11836,20 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A)
@@ -10383,13 +11863,19 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>
 define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -10400,14 +11886,20 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A)
@@ -10421,13 +11913,19 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
 define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -10438,14 +11936,20 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A)
@@ -10459,13 +11963,19 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>
 define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -10476,14 +11986,20 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A)
@@ -10497,13 +12013,19 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x
 define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], #12
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #12
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -10514,14 +12036,20 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.s { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.s { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A)
@@ -10535,13 +12063,19 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x
 define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -10552,14 +12086,20 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A)
@@ -10573,13 +12113,19 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2
 define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], #24
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #24
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -10590,14 +12136,20 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-SD-NEXT:    st3.d { v0, v1, v2 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    mov x8, x0
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-GI-NEXT:    st3.d { v0, v1, v2 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A)
@@ -10611,13 +12163,21 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1
 define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -10628,13 +12188,21 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8
 define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A)
@@ -10648,13 +12216,21 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>
 define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], #4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #4
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -10665,13 +12241,21 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %
 define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x0], x2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.b { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A)
@@ -10685,13 +12269,21 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8
 define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -10702,14 +12294,22 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16
 define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A)
@@ -10723,13 +12323,21 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>
 define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], #8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #8
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -10740,14 +12348,22 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16
 define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.h { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A)
@@ -10761,13 +12377,21 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>
 define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -10778,14 +12402,22 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32
 define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A)
@@ -10799,13 +12431,21 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>
 define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -10816,14 +12456,22 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32
 define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A)
@@ -10837,13 +12485,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>
 define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -10854,14 +12510,22 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64
 define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A)
@@ -10875,13 +12539,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>
 define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -10892,14 +12564,22 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64
 define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A)
@@ -10913,13 +12593,21 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>
 define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -10930,14 +12618,22 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f
 define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A)
@@ -10951,13 +12647,21 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x
 define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], #16
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #16
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -10968,14 +12672,22 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f
 define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.s { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A)
@@ -10989,13 +12701,21 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x
 define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -11006,14 +12726,22 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x
 define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A)
@@ -11027,13 +12755,21 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2
 define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], #32
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, #32
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -11044,14 +12780,22 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x
 define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
 ; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    lsl x8, x2, #3
+; CHECK-SD-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x0], x8
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane:
 ; CHECK-GI:       ; %bb.0:
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    mov x8, x0
 ; CHECK-GI-NEXT:    add x0, x0, x2, lsl #3
+; CHECK-GI-NEXT:    ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-GI-NEXT:    st4.d { v0, v1, v2, v3 }[0], [x8]
 ; CHECK-GI-NEXT:    ret
   call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A)
@@ -11597,15 +13341,19 @@ define <16 x i8> @test_v16i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <16
 define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
 ; CHECK-SD-LABEL: test_v8i8_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], #1
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldrb w8, [x0], #1
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.b v0[1], w8
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
@@ -11617,16 +13365,20 @@ define <8 x i8> @test_v8i8_post_imm_ld1lane(ptr %bar, ptr %ptr, <8 x i8> %A) {
 define <8 x i8> @test_v8i8_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x i8> %A) {
 ; CHECK-SD-LABEL: test_v8i8_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.b { v0 }[1], [x0], x2
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v8i8_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldr b1, [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    mov.b v0[1], v1[0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i8, ptr %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
@@ -11679,15 +13431,19 @@ define <8 x i16> @test_v8i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <8 x
 define <4 x i16> @test_v4i16_post_imm_ld1lane(ptr %bar, ptr %ptr, <4 x i16> %A) {
 ; CHECK-SD-LABEL: test_v4i16_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], #2
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0], #2
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.h v0[1], w8
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
@@ -11700,14 +13456,18 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i16, ptr %bar
@@ -11761,15 +13521,19 @@ define <4 x i32> @test_v4i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4 x
 define <2 x i32> @test_v2i32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x i32> %A) {
 ; CHECK-SD-LABEL: test_v2i32_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldr w8, [x0], #4
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.s v0[1], w8
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
@@ -11782,14 +13546,18 @@ define <2 x i32> @test_v2i32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2 x
 ; CHECK-SD-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load i32, ptr %bar
@@ -11884,15 +13652,19 @@ define <4 x float> @test_v4f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <4
 define <2 x float> @test_v2f32_post_imm_ld1lane(ptr %bar, ptr %ptr, <2 x float> %A) {
 ; CHECK-SD-LABEL: test_v2f32_post_imm_ld1lane:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], #4
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_imm_ld1lane:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    ldr s1, [x0], #4
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str x0, [x1]
 ; CHECK-GI-NEXT:    mov.s v0[1], v1[0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
@@ -11905,14 +13677,18 @@ define <2 x float> @test_v2f32_post_reg_ld1lane(ptr %bar, ptr %ptr, i64 %inc, <2
 ; CHECK-SD-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #2
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.s { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK-GI:       ; %bb.0:
-; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #2
+; CHECK-GI-NEXT:    ld1.s { v0 }[1], [x0]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    str x8, [x1]
 ; CHECK-GI-NEXT:    ret
   %tmp1 = load float, ptr %bar
@@ -11994,9 +13770,11 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-SD-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-SD:       ; %bb.0:
 ; CHECK-SD-NEXT:    lsl x8, x2, #1
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.h { v0 }[1], [x0], x8
 ; CHECK-SD-NEXT:    str x0, [x1]
 ; CHECK-SD-NEXT:    ldr d1, [x3]
+; CHECK-SD-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    cnt.8b v1, v1
 ; CHECK-SD-NEXT:    uaddlp.4h v1, v1
 ; CHECK-SD-NEXT:    uaddlp.2s v1, v1
@@ -12006,8 +13784,10 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(ptr %bar, ptr %ptr,
 ; CHECK-GI-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
 ; CHECK-GI:       ; %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, x2, lsl #1
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ld1.h { v0 }[1], [x0]
 ; CHECK-GI-NEXT:    str x8, [x1]
+; CHECK-GI-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ldr d1, [x3]
 ; CHECK-GI-NEXT:    cnt.8b v1, v1
 ; CHECK-GI-NEXT:    uaddlp.4h v1, v1
@@ -12200,6 +13980,7 @@ define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr q0, [x0]
+; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    bfxil x8, x1, #0, #4
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldrb w0, [x8]
@@ -12224,6 +14005,7 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    ldr q0, [x0]
+; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    bfi x8, x1, #1, #3
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldrh w0, [x8]
@@ -12244,6 +14026,7 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
 define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) {
 ; CHECK-SD-LABEL: load_single_extract_variable_index_i32:
 ; CHECK-SD:       ; %bb.0:
+; CHECK-SD-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x3
 ; CHECK-SD-NEXT:    ldr w0, [x0, x8, lsl #2]
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
index 3b1ff45e19d7ab..2816f91df44ddc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll
@@ -40,6 +40,7 @@ define i32 @foo(ptr %ptr, i32 %x, i64 %y) !dbg !3 {
 ; CHECK-NEXT:    mov w9, #10 ; =0xa
 ; CHECK-NEXT:    str w9, [x8]
 ; CHECK-NEXT:  LBB0_2: ; %common.ret
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %l = load i32, ptr %ptr, !dbg !4

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index cb5a9748e620dd..eaa545473b2e0e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -351,30 +351,63 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi
 
 define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld2.b { v0, v1 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld2lane_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ld2.b { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld2lane_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.b { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x2_t  %tmp2
 }
 
 define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld3lane_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld3lane_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.b { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x3_t  %tmp2
 }
 
 define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld4lane_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld4lane_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.b { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int8x16x4_t  %tmp2
 }
@@ -385,30 +418,63 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8>
 
 define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_8h:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld2.h { v0, v1 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld2lane_8h:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ld2.h { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld2lane_8h:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.h { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x2_t  %tmp2
 }
 
 define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_8h:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld3lane_8h:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld3lane_8h:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.h { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x3_t  %tmp2
 }
 
 define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_8h:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld4lane_8h:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld4lane_8h:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.h { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int16x8x4_t  %tmp2
 }
@@ -419,30 +485,63 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16>
 
 define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_4s:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld2.s { v0, v1 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld2lane_4s:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ld2.s { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld2lane_4s:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.s { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x2_t  %tmp2
 }
 
 define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_4s:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld3lane_4s:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld3lane_4s:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.s { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x3_t  %tmp2
 }
 
 define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_4s:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld4lane_4s:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld4lane_4s:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.s { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int32x4x4_t  %tmp2
 }
@@ -453,30 +552,63 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32>
 
 define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld2lane_2d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld2.d { v0, v1 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld2lane_2d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ld2.d { v0, v1 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld2lane_2d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ld2.d { v0, v1 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x2_t  %tmp2
 }
 
 define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld3lane_2d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld3lane_2d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld3lane_2d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    ld3.d { v0, v1, v2 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x3_t  %tmp2
 }
 
 define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind {
 ; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: ld4lane_2d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: ld4lane_2d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: ld4lane_2d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    ld4.d { v0, v1, v2, v3 }[1], [x0]
+; CHECK-GI-NEXT:    ret
 	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
 	ret %struct.__neon_int64x2x4_t  %tmp2
 }
@@ -956,13 +1088,17 @@ define <1 x i64> @ld1_1d(ptr %p) {
 define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
 ; CHECK-SD-LABEL: ld1_8b:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ld1.b { v0 }[0], [x0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ld1_8b:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr b1, [x0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov.b v0[0], v1[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i8, ptr %bar
@@ -973,7 +1109,9 @@ define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) {
 define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1.h { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i16, ptr %bar
@@ -984,7 +1122,9 @@ define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) {
 define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load i32, ptr %bar
@@ -995,7 +1135,9 @@ define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) {
 define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) {
 ; CHECK-LABEL: ld1_2s_float:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1.s { v0 }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ; Make sure we are using the operands defined by the ABI
   %tmp1 = load float, ptr %bar

diff  --git a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
index ddacb5a7647da8..b498611242d469 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
@@ -132,6 +132,7 @@ declare i64 @llvm.aarch64.ldxr.p0(ptr) nounwind
 define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stxrb w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i8 %val to i64
@@ -143,6 +144,7 @@ define dso_local i32 @test_store_i8(i32, i8 %val, ptr %addr) {
 define dso_local i32 @test_store_i16(i32, i16 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stxrh w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i16 %val to i64
@@ -310,6 +312,7 @@ declare i64 @llvm.aarch64.ldaxr.p0(ptr) nounwind
 define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_release_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stlxrb w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i8 %val to i64
@@ -321,6 +324,7 @@ define dso_local i32 @test_store_release_i8(i32, i8 %val, ptr %addr) {
 define dso_local i32 @test_store_release_i16(i32, i16 %val, ptr %addr) {
 ; CHECK-LABEL: test_store_release_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    stlxrh w0, w1, [x2]
 ; CHECK-NEXT:    ret
   %extval = zext i16 %val to i64

diff  --git a/llvm/test/CodeGen/AArch64/arm64-mul.ll b/llvm/test/CodeGen/AArch64/arm64-mul.ll
index f39869ef0ed261..5ae2722693f898 100644
--- a/llvm/test/CodeGen/AArch64/arm64-mul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-mul.ll
@@ -138,6 +138,7 @@ entry:
 define i64 @t10(i32 %a) nounwind {
 ; CHECK-LABEL: t10:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    mov w9, #2 // =0x2
 ; CHECK-NEXT:    movk w9, #32768, lsl #16

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
index b94629ebff3b9d..f1678ca19f47d5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem-high.ll
@@ -515,6 +515,7 @@ entry:
 define <2 x float> @test_vmul_n_f32(<2 x float> %a, float %b) #0 {
 ; CHECK-LABEL: test_vmul_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -527,6 +528,7 @@ entry:
 define <4 x float> @test_vmulq_n_f32(<4 x float> %a, float %b) #0 {
 ; CHECK-LABEL: test_vmulq_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -541,6 +543,7 @@ entry:
 define <2 x double> @test_vmulq_n_f64(<2 x double> %a, double %b) #0 {
 ; CHECK-LABEL: test_vmulq_n_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -553,6 +556,7 @@ entry:
 define <2 x float> @test_vfma_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfma_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -565,6 +569,7 @@ entry:
 define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfmaq_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -579,6 +584,7 @@ entry:
 define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfms_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmls v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -592,6 +598,7 @@ entry:
 define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
 ; CHECK-LABEL: test_vfmsq_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmls v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
index d5d3e807d975ff..cb87ba9a4ed6c2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -65,6 +65,7 @@ declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
 define <4 x i16> @test_vmla_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmla_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -77,6 +78,7 @@ entry:
 define <8 x i16> @test_vmlaq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.8h, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -89,6 +91,7 @@ entry:
 define <2 x i32> @test_vmla_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmla_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -101,6 +104,7 @@ entry:
 define <4 x i32> @test_vmlaq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -161,6 +165,7 @@ entry:
 define <4 x i16> @test_vmls_lane_s16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmls_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -173,6 +178,7 @@ entry:
 define <8 x i16> @test_vmlsq_lane_s16(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.8h, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -185,6 +191,7 @@ entry:
 define <2 x i32> @test_vmls_lane_s32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmls_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -197,6 +204,7 @@ entry:
 define <4 x i32> @test_vmlsq_lane_s32(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -257,6 +265,7 @@ entry:
 define <4 x i16> @test_vmul_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -268,6 +277,7 @@ entry:
 define <8 x i16> @test_vmulq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -279,6 +289,7 @@ entry:
 define <2 x i32> @test_vmul_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -290,6 +301,7 @@ entry:
 define <4 x i32> @test_vmulq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -301,6 +313,7 @@ entry:
 define <4 x i16> @test_vmul_lane_u16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -312,6 +325,7 @@ entry:
 define <8 x i16> @test_vmulq_lane_u16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -323,6 +337,7 @@ entry:
 define <2 x i32> @test_vmul_lane_u32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -334,6 +349,7 @@ entry:
 define <4 x i32> @test_vmulq_lane_u32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -433,6 +449,7 @@ entry:
 define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfma_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -446,6 +463,7 @@ declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
 define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmaq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -481,6 +499,7 @@ entry:
 define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfms_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -493,6 +512,7 @@ entry:
 define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmsq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.4s, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -529,6 +549,7 @@ entry:
 define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
 ; CHECK-LABEL: test_vfmaq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2d, v1.2d, v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -553,6 +574,7 @@ entry:
 define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) {
 ; CHECK-LABEL: test_vfmsq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.2d, v1.2d, v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -604,6 +626,7 @@ declare double @llvm.fma.f64(double, double, double)
 define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmss_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -652,6 +675,7 @@ entry:
 define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmss_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -688,6 +712,7 @@ entry:
 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -700,6 +725,7 @@ entry:
 define <2 x i64> @test_vmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -736,6 +762,7 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -749,6 +776,7 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -788,6 +816,7 @@ entry:
 define <4 x i32> @test_vmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -800,6 +829,7 @@ entry:
 define <2 x i64> @test_vmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -836,6 +866,7 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -849,6 +880,7 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -888,6 +920,7 @@ entry:
 define <4 x i32> @test_vmlal_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -900,6 +933,7 @@ entry:
 define <2 x i64> @test_vmlal_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -936,6 +970,7 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -949,6 +984,7 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -988,6 +1024,7 @@ entry:
 define <4 x i32> @test_vmlsl_lane_u16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1000,6 +1037,7 @@ entry:
 define <2 x i64> @test_vmlsl_lane_u32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1036,6 +1074,7 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_u16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1049,6 +1088,7 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_u32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1088,6 +1128,7 @@ entry:
 define <4 x i32> @test_vmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.4s, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1099,6 +1140,7 @@ entry:
 define <2 x i64> @test_vmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.2d, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1110,6 +1152,7 @@ entry:
 define <4 x i32> @test_vmull_lane_u16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.4s, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1121,6 +1164,7 @@ entry:
 define <2 x i64> @test_vmull_lane_u32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.2d, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1132,6 +1176,7 @@ entry:
 define <4 x i32> @test_vmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.4s, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1144,6 +1189,7 @@ entry:
 define <2 x i64> @test_vmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.2d, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1156,6 +1202,7 @@ entry:
 define <4 x i32> @test_vmull_high_lane_u16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.4s, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1168,6 +1215,7 @@ entry:
 define <2 x i64> @test_vmull_high_lane_u32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.2d, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1272,6 +1320,7 @@ entry:
 define <4 x i32> @test_vqdmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1284,6 +1333,7 @@ entry:
 define <2 x i64> @test_vqdmlal_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1296,6 +1346,7 @@ entry:
 define <4 x i32> @test_vqdmlal_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1309,6 +1360,7 @@ entry:
 define <2 x i64> @test_vqdmlal_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1322,6 +1374,7 @@ entry:
 define <4 x i32> @test_vqdmlsl_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.4s, v1.4h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1334,6 +1387,7 @@ entry:
 define <2 x i64> @test_vqdmlsl_lane_s32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.2d, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1346,6 +1400,7 @@ entry:
 define <4 x i32> @test_vqdmlsl_high_lane_s16(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.4s, v1.8h, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1359,6 +1414,7 @@ entry:
 define <2 x i64> @test_vqdmlsl_high_lane_s32(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.2d, v1.4s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1372,6 +1428,7 @@ entry:
 define <4 x i32> @test_vqdmull_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.4s, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1383,6 +1440,7 @@ entry:
 define <2 x i64> @test_vqdmull_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.2d, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1416,6 +1474,7 @@ entry:
 define <4 x i32> @test_vqdmull_high_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.4s, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1428,6 +1487,7 @@ entry:
 define <2 x i64> @test_vqdmull_high_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.2d, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1464,6 +1524,7 @@ entry:
 define <4 x i16> @test_vqdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1475,6 +1536,7 @@ entry:
 define <4 x i16> @test_vqdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1505,6 +1567,7 @@ entry:
 define <8 x i16> @test_vqdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1516,6 +1579,7 @@ entry:
 define <8 x i16> @test_vqdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1546,6 +1610,7 @@ entry:
 define <2 x i32> @test_vqdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1557,6 +1622,7 @@ entry:
 define <2 x i32> @test_vqdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1587,6 +1653,7 @@ entry:
 define <4 x i32> @test_vqdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1598,6 +1665,7 @@ entry:
 define <4 x i32> @test_vqdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1628,6 +1696,7 @@ entry:
 define <4 x i16> @test_vqrdmulh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1639,6 +1708,7 @@ entry:
 define <4 x i16> @test_vqrdmulh_lane_s16_intrinsic(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1669,6 +1739,7 @@ entry:
 define <8 x i16> @test_vqrdmulhq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1680,6 +1751,7 @@ entry:
 define <8 x i16> @test_vqrdmulhq_lane_s16_intrinsic(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -1710,6 +1782,7 @@ entry:
 define <2 x i32> @test_vqrdmulh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1721,6 +1794,7 @@ entry:
 define <2 x i32> @test_vqrdmulh_lane_s32_intrinsic(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1751,6 +1825,7 @@ entry:
 define <4 x i32> @test_vqrdmulhq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1762,6 +1837,7 @@ entry:
 define <4 x i32> @test_vqrdmulhq_lane_s32_intrinsic(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_intrinsic:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1792,6 +1868,7 @@ entry:
 define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmul_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1817,6 +1894,7 @@ entry:
 define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1828,6 +1906,7 @@ entry:
 define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1968,6 +2047,7 @@ entry:
 define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulx_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2s, v0.2s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1979,6 +2059,7 @@ entry:
 define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.4s, v0.4s, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -2003,6 +2084,7 @@ entry:
 define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2060,6 +2142,7 @@ entry:
 define <4 x i16> @test_vmla_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmla_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2072,6 +2155,7 @@ entry:
 define <8 x i16> @test_vmlaq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2084,6 +2168,7 @@ entry:
 define <2 x i32> @test_vmla_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmla_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2096,6 +2181,7 @@ entry:
 define <4 x i32> @test_vmlaq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlaq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mla v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2156,6 +2242,7 @@ entry:
 define <4 x i16> @test_vmls_lane_s16_0(<4 x i16> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmls_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2168,6 +2255,7 @@ entry:
 define <8 x i16> @test_vmlsq_lane_s16_0(<8 x i16> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2180,6 +2268,7 @@ entry:
 define <2 x i32> @test_vmls_lane_s32_0(<2 x i32> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmls_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2192,6 +2281,7 @@ entry:
 define <4 x i32> @test_vmlsq_lane_s32_0(<4 x i32> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mls v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2252,6 +2342,7 @@ entry:
 define <4 x i16> @test_vmul_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2263,6 +2354,7 @@ entry:
 define <8 x i16> @test_vmulq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2274,6 +2366,7 @@ entry:
 define <2 x i32> @test_vmul_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2285,6 +2378,7 @@ entry:
 define <4 x i32> @test_vmulq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2296,6 +2390,7 @@ entry:
 define <4 x i16> @test_vmul_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmul_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2307,6 +2402,7 @@ entry:
 define <8 x i16> @test_vmulq_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2318,6 +2414,7 @@ entry:
 define <2 x i32> @test_vmul_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmul_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2329,6 +2426,7 @@ entry:
 define <4 x i32> @test_vmulq_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmulq_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2428,6 +2526,7 @@ entry:
 define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfma_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2439,6 +2538,7 @@ entry:
 define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmaq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2472,6 +2572,7 @@ entry:
 define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfms_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.2s, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2484,6 +2585,7 @@ entry:
 define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) {
 ; CHECK-LABEL: test_vfmsq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.4s, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2543,6 +2645,7 @@ entry:
 define <4 x i32> @test_vmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2555,6 +2658,7 @@ entry:
 define <2 x i64> @test_vmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2591,6 +2695,7 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2604,6 +2709,7 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2643,6 +2749,7 @@ entry:
 define <4 x i32> @test_vmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2655,6 +2762,7 @@ entry:
 define <2 x i64> @test_vmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2691,6 +2799,7 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2704,6 +2813,7 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlsl2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2743,6 +2853,7 @@ entry:
 define <4 x i32> @test_vmlal_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2755,6 +2866,7 @@ entry:
 define <2 x i64> @test_vmlal_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2791,6 +2903,7 @@ entry:
 define <4 x i32> @test_vmlal_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2804,6 +2917,7 @@ entry:
 define <2 x i64> @test_vmlal_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlal_high_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2843,6 +2957,7 @@ entry:
 define <4 x i32> @test_vmlsl_lane_u16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2855,6 +2970,7 @@ entry:
 define <2 x i64> @test_vmlsl_lane_u32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2891,6 +3007,7 @@ entry:
 define <4 x i32> @test_vmlsl_high_lane_u16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2904,6 +3021,7 @@ entry:
 define <2 x i64> @test_vmlsl_high_lane_u32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmlsl_high_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlsl2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2943,6 +3061,7 @@ entry:
 define <4 x i32> @test_vmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2954,6 +3073,7 @@ entry:
 define <2 x i64> @test_vmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull v0.2d, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2965,6 +3085,7 @@ entry:
 define <4 x i32> @test_vmull_lane_u16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2976,6 +3097,7 @@ entry:
 define <2 x i64> @test_vmull_lane_u32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull v0.2d, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2987,6 +3109,7 @@ entry:
 define <4 x i32> @test_vmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2999,6 +3122,7 @@ entry:
 define <2 x i64> @test_vmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull2 v0.2d, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3011,6 +3135,7 @@ entry:
 define <4 x i32> @test_vmull_high_lane_u16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3023,6 +3148,7 @@ entry:
 define <2 x i64> @test_vmull_high_lane_u32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vmull_high_lane_u32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull2 v0.2d, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3127,6 +3253,7 @@ entry:
 define <4 x i32> @test_vqdmlal_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3139,6 +3266,7 @@ entry:
 define <2 x i64> @test_vqdmlal_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3151,6 +3279,7 @@ entry:
 define <4 x i32> @test_vqdmlal_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3164,6 +3293,7 @@ entry:
 define <2 x i64> @test_vqdmlal_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlal_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlal2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3177,6 +3307,7 @@ entry:
 define <4 x i32> @test_vqdmlsl_lane_s16_0(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.4s, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3189,6 +3320,7 @@ entry:
 define <2 x i64> @test_vqdmlsl_lane_s32_0(<2 x i64> %a, <2 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl v0.2d, v1.2s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3201,6 +3333,7 @@ entry:
 define <4 x i32> @test_vqdmlsl_high_lane_s16_0(<4 x i32> %a, <8 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.4s, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3214,6 +3347,7 @@ entry:
 define <2 x i64> @test_vqdmlsl_high_lane_s32_0(<2 x i64> %a, <4 x i32> %b, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmlsl_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqdmlsl2 v0.2d, v1.4s, v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3227,6 +3361,7 @@ entry:
 define <4 x i32> @test_vqdmull_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3238,6 +3373,7 @@ entry:
 define <2 x i64> @test_vqdmull_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v0.2d, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3271,6 +3407,7 @@ entry:
 define <4 x i32> @test_vqdmull_high_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3283,6 +3420,7 @@ entry:
 define <2 x i64> @test_vqdmull_high_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmull_high_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull2 v0.2d, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3319,6 +3457,7 @@ entry:
 define <4 x i16> @test_vqdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3330,6 +3469,7 @@ entry:
 define <8 x i16> @test_vqdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3341,6 +3481,7 @@ entry:
 define <2 x i32> @test_vqdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulh_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3352,6 +3493,7 @@ entry:
 define <4 x i32> @test_vqdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqdmulhq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3363,6 +3505,7 @@ entry:
 define <4 x i16> @test_vqrdmulh_lane_s16_0(<4 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3374,6 +3517,7 @@ entry:
 define <8 x i16> @test_vqrdmulhq_lane_s16_0(<8 x i16> %a, <4 x i16> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3385,6 +3529,7 @@ entry:
 define <2 x i32> @test_vqrdmulh_lane_s32_0(<2 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulh_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3396,6 +3541,7 @@ entry:
 define <4 x i32> @test_vqrdmulhq_lane_s32_0(<4 x i32> %a, <2 x i32> %v) {
 ; CHECK-LABEL: test_vqrdmulhq_lane_s32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3407,6 +3553,7 @@ entry:
 define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmul_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3418,6 +3565,7 @@ entry:
 define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3476,6 +3624,7 @@ entry:
 define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulx_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3487,6 +3636,7 @@ entry:
 define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.4s, v0.4s, v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3498,6 +3648,7 @@ entry:
 define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) {
 ; CHECK-LABEL: test_vmulxq_lane_f64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.2d, v0.2d, v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -3585,6 +3736,7 @@ entry:
 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_a57(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="cortex-a57" {
 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_a57:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -3596,6 +3748,7 @@ entry:
 define <2 x float> @test_vfma_lane_simdinstr_opt_pass_caching_m3(<2 x float> %a, <2 x float> %b, <2 x float> %v) "target-cpu"="exynos-m3" {
 ; CHECK-LABEL: test_vfma_lane_simdinstr_opt_pass_caching_m3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
index 2c38ec4bfaf133..79645e32074c89 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3v
diff .ll
@@ -1049,6 +1049,7 @@ entry:
 define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vaddhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1065,6 +1066,7 @@ entry:
 define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vaddhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1081,6 +1083,7 @@ entry:
 define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vaddhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1097,6 +1100,7 @@ entry:
 define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vaddhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1113,6 +1117,7 @@ entry:
 define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vaddhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1129,6 +1134,7 @@ entry:
 define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vaddhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    addhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1205,6 +1211,7 @@ entry:
 define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vraddhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1219,6 +1226,7 @@ entry:
 define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vraddhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1233,6 +1241,7 @@ entry:
 define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vraddhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1247,6 +1256,7 @@ entry:
 define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vraddhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1261,6 +1271,7 @@ entry:
 define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vraddhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1275,6 +1286,7 @@ entry:
 define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vraddhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    raddhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1361,6 +1373,7 @@ entry:
 define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vsubhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1377,6 +1390,7 @@ entry:
 define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vsubhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1393,6 +1407,7 @@ entry:
 define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vsubhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1409,6 +1424,7 @@ entry:
 define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vsubhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1425,6 +1441,7 @@ entry:
 define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vsubhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1441,6 +1458,7 @@ entry:
 define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vsubhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    subhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1517,6 +1535,7 @@ entry:
 define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1531,6 +1550,7 @@ entry:
 define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1545,6 +1565,7 @@ entry:
 define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_s64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -1559,6 +1580,7 @@ entry:
 define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -1573,6 +1595,7 @@ entry:
 define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -1587,6 +1610,7 @@ entry:
 define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrsubhn_high_u64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rsubhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
index 8f2338706c8177..17fb312c63754d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -158,6 +158,7 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add v0.8h, v0.8h, v1.8h
 ; CHECK-SD-NEXT:    addp v0.8h, v0.8h, v0.8h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addp_v8i16:
@@ -178,6 +179,7 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add v0.16b, v0.16b, v1.16b
 ; CHECK-SD-NEXT:    addp v0.16b, v0.16b, v0.16b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addp_v16i8:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
index c380648e64046c..c0d91c1e0c836b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -44,7 +44,9 @@ define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
 ; CHECK-LABEL: ins8bw:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[5], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
   ret <8 x i8> %tmp3
@@ -53,7 +55,9 @@ define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
 ; CHECK-LABEL: ins4hw:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
   ret <4 x i16> %tmp3
@@ -62,7 +66,9 @@ define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
 ; CHECK-LABEL: ins2sw:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
   ret <2 x i32> %tmp3
@@ -144,12 +150,14 @@ define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
 ; CHECK-SD-LABEL: ins8b16:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v1.b[15], v0.b[2]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ins8b16:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b2, v0.b[2]
 ; CHECK-GI-NEXT:    mov v0.16b, v1.16b
 ; CHECK-GI-NEXT:    mov v0.b[15], v2.b[0]
@@ -162,6 +170,7 @@ define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
 ; CHECK-LABEL: ins4h8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.h[7], v0.h[2]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -173,6 +182,7 @@ define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
 ; CHECK-LABEL: ins2s4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -184,6 +194,7 @@ define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
 ; CHECK-LABEL: ins1d2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -195,6 +206,7 @@ define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
 ; CHECK-LABEL: ins2f4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -206,6 +218,7 @@ define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
 ; CHECK-LABEL: ins1f2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -217,6 +230,7 @@ define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
 define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) {
 ; CHECK-LABEL: ins1f2_args_flipped:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <1 x double> %tmp1, i32 0
@@ -227,6 +241,7 @@ define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1)
 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-SD-LABEL: ins16b8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v1.b[7], v0.b[2]
 ; CHECK-SD-NEXT:    fmov d0, d1
 ; CHECK-SD-NEXT:    ret
@@ -236,6 +251,7 @@ define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-GI-NEXT:    mov b2, v0.b[2]
 ; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    mov v0.b[7], v2.b[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
@@ -245,6 +261,7 @@ define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
 ; CHECK-LABEL: ins8h4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v1.h[3], v0.h[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -256,6 +273,7 @@ define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
 ; CHECK-LABEL: ins4s2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v1.s[1], v0.s[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -267,6 +285,7 @@ define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
 ; CHECK-LABEL: ins2d1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
@@ -276,6 +295,7 @@ define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
 ; CHECK-LABEL: ins4f2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v1.s[1], v0.s[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -288,6 +308,7 @@ define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
 ; CHECK-SD-LABEL: ins2f1:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ins2f1:
@@ -302,15 +323,19 @@ define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
 ; CHECK-SD-LABEL: ins8b8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v1.b[4], v0.b[2]
 ; CHECK-SD-NEXT:    fmov d0, d1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ins8b8:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b2, v0.b[2]
 ; CHECK-GI-NEXT:    fmov d0, d1
 ; CHECK-GI-NEXT:    mov v0.b[4], v2.b[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
@@ -320,6 +345,8 @@ define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
 ; CHECK-LABEL: ins4h4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.h[3], v0.h[2]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -331,6 +358,8 @@ define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
 ; CHECK-LABEL: ins2s2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -351,6 +380,8 @@ define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
 ; CHECK-LABEL: ins2f2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -421,6 +452,7 @@ define i64 @umovx2d(<2 x i64> %tmp1) {
 define i32 @umovw8b(<8 x i8> %tmp1) {
 ; CHECK-LABEL: umovw8b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[7]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <8 x i8> %tmp1, i32 7
@@ -431,6 +463,7 @@ define i32 @umovw8b(<8 x i8> %tmp1) {
 define i32 @umovw4h(<4 x i16> %tmp1) {
 ; CHECK-LABEL: umovw4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
@@ -441,11 +474,13 @@ define i32 @umovw4h(<4 x i16> %tmp1) {
 define i32 @umovw2s(<2 x i32> %tmp1) {
 ; CHECK-SD-LABEL: umovw2s:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: umovw2s:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -454,10 +489,16 @@ define i32 @umovw2s(<2 x i32> %tmp1) {
 }
 
 define i64 @umovx1d(<1 x i64> %tmp1) {
-; CHECK-LABEL: umovx1d:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: umovx1d:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: umovx1d:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    ret
   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
   ret i64 %tmp3
 }
@@ -519,6 +560,7 @@ define i64 @smovx4s(<4 x i32> %tmp1) {
 define i32 @smovw8b(<8 x i8> %tmp1) {
 ; CHECK-LABEL: smovw8b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[4]
 ; CHECK-NEXT:    add w0, w8, w8
 ; CHECK-NEXT:    ret
@@ -531,6 +573,7 @@ define i32 @smovw8b(<8 x i8> %tmp1) {
 define i32 @smovw4h(<4 x i16> %tmp1) {
 ; CHECK-LABEL: smovw4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.h[2]
 ; CHECK-NEXT:    add w0, w8, w8
 ; CHECK-NEXT:    ret
@@ -543,6 +586,7 @@ define i32 @smovw4h(<4 x i16> %tmp1) {
 define i32 @smovx8b(<8 x i8> %tmp1) {
 ; CHECK-LABEL: smovx8b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.b[6]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <8 x i8> %tmp1, i32 6
@@ -553,6 +597,7 @@ define i32 @smovx8b(<8 x i8> %tmp1) {
 define i32 @smovx4h(<4 x i16> %tmp1) {
 ; CHECK-LABEL: smovx4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
@@ -563,6 +608,7 @@ define i32 @smovx4h(<4 x i16> %tmp1) {
 define i64 @smovx2s(<2 x i32> %tmp1) {
 ; CHECK-LABEL: smovx2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.s[1]
 ; CHECK-NEXT:    ret
   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
@@ -573,13 +619,19 @@ define i64 @smovx2s(<2 x i32> %tmp1) {
 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
 ; CHECK-SD-LABEL: test_vcopy_lane_s8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.b[5], v1.b[3]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_vcopy_lane_s8:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b1, v1.b[3]
 ; CHECK-GI-NEXT:    mov v0.b[5], v1.b[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
   ret <8 x i8> %vset_lane
@@ -603,6 +655,8 @@ define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.b[7], v0.b[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -739,6 +793,7 @@ define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
 ; CHECK-LABEL: test_vdup_lane_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[5]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -748,6 +803,7 @@ define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
 ; CHECK-LABEL: test_vdup_lane_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[2]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -757,6 +813,7 @@ define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
 ; CHECK-LABEL: test_vdup_lane_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
@@ -766,6 +823,7 @@ define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.16b, v0.b[5]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -775,6 +833,7 @@ define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[2]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -784,6 +843,7 @@ define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[1]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -793,6 +853,7 @@ define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
 ; CHECK-LABEL: test_vdupq_lane_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
@@ -808,6 +869,7 @@ define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
 ; CHECK-GI-LABEL: test_vdup_laneq_s8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    dup v0.16b, v0.b[5]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i8> %shuffle
@@ -822,6 +884,7 @@ define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
 ; CHECK-GI-LABEL: test_vdup_laneq_s16:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    dup v0.8h, v0.h[2]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i16> %shuffle
@@ -836,6 +899,7 @@ define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
 ; CHECK-GI-LABEL: test_vdup_laneq_s32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    dup v0.4s, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
   ret <2 x i32> %shuffle
@@ -1060,11 +1124,18 @@ define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
 }
 
 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov8i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, d0
-; CHECK-NEXT:    neg v0.8b, v0.8b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv1f64tov8i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    scvtf d0, d0
+; CHECK-SD-NEXT:    neg v0.8b, v0.8b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv1f64tov8i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, d0
+; CHECK-GI-NEXT:    neg v0.8b, v0.8b
+; CHECK-GI-NEXT:    ret
   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
   %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
   %sub.i = sub <8 x i8> zeroinitializer, %1
@@ -1072,11 +1143,18 @@ define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
 }
 
 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov4i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, d0
-; CHECK-NEXT:    neg v0.4h, v0.4h
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv1f64tov4i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    scvtf d0, d0
+; CHECK-SD-NEXT:    neg v0.4h, v0.4h
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv1f64tov4i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, d0
+; CHECK-GI-NEXT:    neg v0.4h, v0.4h
+; CHECK-GI-NEXT:    ret
   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
   %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
   %sub.i = sub <4 x i16> zeroinitializer, %1
@@ -1084,11 +1162,18 @@ define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
 }
 
 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
-; CHECK-LABEL: test_bitcastv1f64tov2i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    scvtf d0, d0
-; CHECK-NEXT:    neg v0.2s, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_bitcastv1f64tov2i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    scvtf d0, d0
+; CHECK-SD-NEXT:    neg v0.2s, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_bitcastv1f64tov2i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    scvtf d0, d0
+; CHECK-GI-NEXT:    neg v0.2s, v0.2s
+; CHECK-GI-NEXT:    ret
   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
   %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
   %sub.i = sub <2 x i32> zeroinitializer, %1
@@ -1098,6 +1183,7 @@ define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
 ; CHECK-SD-LABEL: test_bitcastv1f64tov1i64:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    scvtf d0, d0
 ; CHECK-SD-NEXT:    neg d0, d0
 ; CHECK-SD-NEXT:    ret
@@ -1118,6 +1204,7 @@ define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    fneg v0.2s, v0.2s
 ; CHECK-NEXT:    ret
@@ -1173,6 +1260,7 @@ define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
 ; CHECK-GI-LABEL: scalar_to_vector.v2i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %b = insertelement <2 x i32> undef, i32 %a, i32 0
   ret <2 x i32> %b
@@ -1209,6 +1297,7 @@ define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 ; CHECK-SD-LABEL: testDUP.v1i8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1232,6 +1321,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 ; CHECK-LABEL: testDUP.v1i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = extractelement <1 x i16> %a, i32 0
@@ -1249,6 +1339,7 @@ define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 ; CHECK-LABEL: testDUP.v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
   %b = extractelement <1 x i32> %a, i32 0
@@ -1262,6 +1353,7 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
 define <8 x i8> @getl(<16 x i8> %x) #0 {
 ; CHECK-SD-LABEL: getl:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: getl:
@@ -1308,6 +1400,7 @@ define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    ldr h1, [x8]
@@ -1350,6 +1443,7 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    str h0, [x8]
 ; CHECK-SD-NEXT:    ldr d1, [sp, #8]
@@ -1390,11 +1484,13 @@ define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
 ; CHECK-SD-LABEL: test_dup_v2i32_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[2]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_dup_v2i32_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
 ; CHECK-GI-NEXT:    dup v0.4h, w8
 ; CHECK-GI-NEXT:    ret
@@ -1436,6 +1532,7 @@ entry:
 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
 ; CHECK-SD-LABEL: test_dup_v1i64_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1457,6 +1554,7 @@ entry:
 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
 ; CHECK-SD-LABEL: test_dup_v1i64_v2i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1613,12 +1711,15 @@ declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
 ; CHECK-SD-LABEL: test_concat_undef_v1i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_undef_v1i32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %0 = extractelement <2 x i32> %a, i32 0
@@ -1643,6 +1744,7 @@ entry:
 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1661,6 +1763,7 @@ define <2 x i32> @test_concat_
diff _v1i32_v1i32(i32 %a, i32 %b) {
 ; CHECK-NEXT:    sqabs s0, s1
 ; CHECK-NEXT:    fmov w8, s2
 ; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
@@ -1680,7 +1783,9 @@ define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
 ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI126_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI126_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1691,12 +1796,14 @@ entry:
 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v16i8_v8i8_v16i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b3, v0.b[1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI127_0
 ; CHECK-GI-NEXT:    mov v1.b[0], v0.b[0]
@@ -1740,6 +1847,7 @@ entry:
 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v16i8_v16i8_v8i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1747,6 +1855,7 @@ define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov b3, v0.b[1]
 ; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov b4, v0.b[2]
 ; CHECK-GI-NEXT:    mov v2.b[1], v3.b[0]
 ; CHECK-GI-NEXT:    mov b3, v0.b[3]
@@ -1816,13 +1925,17 @@ entry:
 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v16i8_v8i8_v8i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov b3, v0.b[1]
 ; CHECK-GI-NEXT:    mov v2.b[0], v0.b[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov b4, v0.b[2]
 ; CHECK-GI-NEXT:    mov v2.b[1], v3.b[0]
 ; CHECK-GI-NEXT:    mov b3, v0.b[3]
@@ -1898,7 +2011,9 @@ define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
 ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI130_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI130_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1909,14 +2024,16 @@ entry:
 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v8i16_v4i16_v8i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.16b, v1.16b
-; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI131_0
+; CHECK-GI-NEXT:    mov v1.h[0], v0.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[1], v0.h[1]
 ; CHECK-GI-NEXT:    mov v1.h[2], v0.h[2]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[3]
@@ -1939,12 +2056,14 @@ entry:
 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v8i16_v8i16_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.h[0], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.h[1], v0.h[1]
 ; CHECK-GI-NEXT:    mov v2.h[2], v0.h[2]
 ; CHECK-GI-NEXT:    mov v2.h[3], v0.h[3]
@@ -1977,12 +2096,16 @@ entry:
 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v8i16_v4i16_v4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov v2.h[0], v0.h[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.h[1], v0.h[1]
 ; CHECK-GI-NEXT:    mov v2.h[2], v0.h[2]
 ; CHECK-GI-NEXT:    mov v2.h[3], v0.h[3]
@@ -2021,7 +2144,9 @@ define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
 ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI134_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI134_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2032,14 +2157,16 @@ entry:
 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v4i32_v2i32_v4i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.16b, v1.16b
-; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI135_0
+; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI135_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
@@ -2056,12 +2183,14 @@ entry:
 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v4i32_v4i32_v2i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v2i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.s[0], v0.s[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    mov v2.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    mov v2.s[3], v1.s[1]
@@ -2082,6 +2211,8 @@ entry:
 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2102,6 +2233,7 @@ entry:
 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -2114,12 +2246,14 @@ entry:
 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
 ; CHECK-SD-LABEL: test_concat_v2i64_v2i64_v1i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_concat_v2i64_v2i64_v1i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2133,6 +2267,8 @@ entry:
 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2210,6 +2346,7 @@ define <16 x i8> @concat_vector_v16i8_const() {
 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
@@ -2219,6 +2356,7 @@ define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 ; CHECK-LABEL: concat_vector_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
@@ -2228,6 +2366,7 @@ define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 ; CHECK-SD-LABEL: concat_vector_v8i8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2243,6 +2382,7 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 ; CHECK-LABEL: concat_vector_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
@@ -2252,6 +2392,7 @@ define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
 ; CHECK-SD-LABEL: concat_vector_v16i8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.16b, v0.b[0]
 ; CHECK-SD-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
index d04bac78377bfb..6327679756739a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll
@@ -7,11 +7,12 @@ define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v3.4s, #2
 ; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
-; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    mov v1.16b, v3.16b
+; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ld2 { v0.s, v1.s }[1], [x0]
 ; CHECK-NEXT:    mov v1.16b, v2.16b
 ; CHECK-NEXT:    ld2 { v0.s, v1.s }[1], [x1]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1
 ; CHECK-NEXT:    ret
 entry:
   %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>, i64 1, ptr %a)
@@ -24,16 +25,17 @@ entry:
 define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) {
 ; CHECK-LABEL: copyTuple.QTriple:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $q31_q0_q1
 ; CHECK-NEXT:    movi v31.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v1.16b, v0.16b
-; CHECK-NEXT:    mov v2.16b, v0.16b
-; CHECK-NEXT:    mov v3.16b, v1.16b
-; CHECK-NEXT:    mov v1.16b, v31.16b
-; CHECK-NEXT:    ld3 { v1.s, v2.s, v3.s }[1], [x0]
 ; CHECK-NEXT:    mov v2.16b, v31.16b
 ; CHECK-NEXT:    mov v3.16b, v0.16b
-; CHECK-NEXT:    ld3 { v1.s, v2.s, v3.s }[1], [x1]
-; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    mov v4.16b, v1.16b
+; CHECK-NEXT:    ld3 { v2.s, v3.s, v4.s }[1], [x0]
+; CHECK-NEXT:    mov v3.16b, v31.16b
+; CHECK-NEXT:    mov v4.16b, v0.16b
+; CHECK-NEXT:    ld3 { v2.s, v3.s, v4.s }[1], [x1]
+; CHECK-NEXT:    mov v0.16b, v2.16b
 ; CHECK-NEXT:    ret
 entry:
   %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)
@@ -46,19 +48,20 @@ entry:
 define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) {
 ; CHECK-LABEL: copyTuple.QQuad:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $q31_q0_q1_q2
 ; CHECK-NEXT:    movi v31.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v1.16b, v0.16b
 ; CHECK-NEXT:    mov v2.16b, v0.16b
-; CHECK-NEXT:    mov v4.16b, v2.16b
-; CHECK-NEXT:    mov v3.16b, v1.16b
-; CHECK-NEXT:    mov v2.16b, v0.16b
-; CHECK-NEXT:    mov v1.16b, v31.16b
-; CHECK-NEXT:    ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x0]
-; CHECK-NEXT:    mov v2.16b, v31.16b
-; CHECK-NEXT:    mov v3.16b, v0.16b
+; CHECK-NEXT:    mov v3.16b, v31.16b
 ; CHECK-NEXT:    mov v4.16b, v0.16b
-; CHECK-NEXT:    ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x1]
-; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    mov v5.16b, v1.16b
+; CHECK-NEXT:    mov v6.16b, v2.16b
+; CHECK-NEXT:    ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0]
+; CHECK-NEXT:    mov v4.16b, v31.16b
+; CHECK-NEXT:    mov v5.16b, v0.16b
+; CHECK-NEXT:    mov v6.16b, v0.16b
+; CHECK-NEXT:    ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1]
+; CHECK-NEXT:    mov v0.16b, v3.16b
 ; CHECK-NEXT:    ret
 entry:
   %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a)

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
index 8f6cd8db84257f..ecf3f69825c0e9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll
@@ -59,6 +59,8 @@ define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: mul1xi64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mul x8, x9, x8
@@ -140,6 +142,8 @@ define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) {
 define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: sdiv1x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.b[0]
 ; CHECK-NEXT:    smov w9, v0.b[0]
 ; CHECK-NEXT:    sdiv w8, w9, w8
@@ -152,6 +156,8 @@ define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: sdiv8x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.b[1]
 ; CHECK-NEXT:    smov w9, v0.b[1]
 ; CHECK-NEXT:    smov w10, v0.b[0]
@@ -266,6 +272,8 @@ define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: sdiv1x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.h[0]
 ; CHECK-NEXT:    smov w9, v0.h[0]
 ; CHECK-NEXT:    sdiv w8, w9, w8
@@ -278,6 +286,8 @@ define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: sdiv4x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.h[1]
 ; CHECK-NEXT:    smov w9, v0.h[1]
 ; CHECK-NEXT:    smov w10, v0.h[0]
@@ -294,6 +304,7 @@ define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    sdiv w8, w12, w11
 ; CHECK-NEXT:    mov v0.h[2], w10
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = sdiv <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -343,6 +354,8 @@ define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: sdiv1x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    sdiv w8, w9, w8
@@ -355,6 +368,8 @@ define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: sdiv2x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w10, v0.s[1]
@@ -363,6 +378,7 @@ define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    sdiv w9, w10, w9
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = sdiv <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -395,6 +411,8 @@ define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: sdiv1x64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    sdiv x8, x9, x8
@@ -423,6 +441,8 @@ define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) {
 define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: udiv1x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[0]
 ; CHECK-NEXT:    udiv w8, w9, w8
@@ -435,6 +455,8 @@ define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: udiv8x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.b[1]
 ; CHECK-NEXT:    umov w9, v0.b[1]
 ; CHECK-NEXT:    umov w10, v0.b[0]
@@ -549,6 +571,8 @@ define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: udiv1x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.h[0]
 ; CHECK-NEXT:    umov w9, v0.h[0]
 ; CHECK-NEXT:    udiv w8, w9, w8
@@ -561,6 +585,8 @@ define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: udiv4x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.h[1]
 ; CHECK-NEXT:    umov w9, v0.h[1]
 ; CHECK-NEXT:    umov w10, v0.h[0]
@@ -577,6 +603,7 @@ define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    udiv w8, w12, w11
 ; CHECK-NEXT:    mov v0.h[2], w10
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = udiv <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -626,6 +653,8 @@ define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: udiv1x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    udiv w8, w9, w8
@@ -638,6 +667,8 @@ define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: udiv2x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w10, v0.s[1]
@@ -646,6 +677,7 @@ define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    udiv w9, w10, w9
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = udiv <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -678,6 +710,8 @@ define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: udiv1x64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    udiv x8, x9, x8
@@ -706,6 +740,8 @@ define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) {
 define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: srem1x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.b[0]
 ; CHECK-NEXT:    smov w9, v0.b[0]
 ; CHECK-NEXT:    sdiv w10, w9, w8
@@ -719,6 +755,8 @@ define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: srem8x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w11, v1.b[0]
 ; CHECK-NEXT:    smov w12, v0.b[0]
 ; CHECK-NEXT:    smov w8, v1.b[1]
@@ -878,6 +916,8 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: srem1x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v1.h[0]
 ; CHECK-NEXT:    smov w9, v0.h[0]
 ; CHECK-NEXT:    sdiv w10, w9, w8
@@ -891,6 +931,8 @@ define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: srem4x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w11, v1.h[0]
 ; CHECK-NEXT:    smov w12, v0.h[0]
 ; CHECK-NEXT:    smov w8, v1.h[1]
@@ -911,6 +953,7 @@ define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    msub w8, w9, w17, w18
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = srem <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -968,6 +1011,8 @@ define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: srem1x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    sdiv w10, w9, w8
@@ -981,6 +1026,8 @@ define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: srem2x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w11, v1.s[1]
@@ -991,6 +1038,7 @@ define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    msub w9, w13, w11, w12
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = srem <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -1027,6 +1075,8 @@ define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: srem1x64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    sdiv x10, x9, x8
@@ -1058,6 +1108,8 @@ define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) {
 define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
 ; CHECK-LABEL: urem1x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[0]
 ; CHECK-NEXT:    udiv w10, w9, w8
@@ -1071,6 +1123,8 @@ define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) {
 define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: urem8x8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w11, v1.b[0]
 ; CHECK-NEXT:    umov w12, v0.b[0]
 ; CHECK-NEXT:    umov w8, v1.b[1]
@@ -1230,6 +1284,8 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) {
 define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
 ; CHECK-LABEL: urem1x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v1.h[0]
 ; CHECK-NEXT:    umov w9, v0.h[0]
 ; CHECK-NEXT:    udiv w10, w9, w8
@@ -1243,6 +1299,8 @@ define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) {
 define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: urem4x16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w11, v1.h[0]
 ; CHECK-NEXT:    umov w12, v0.h[0]
 ; CHECK-NEXT:    umov w8, v1.h[1]
@@ -1263,6 +1321,7 @@ define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    msub w8, w9, w17, w18
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = urem <4 x i16> %A, %B;
 	ret <4 x i16> %tmp3
@@ -1320,6 +1379,8 @@ define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) {
 define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
 ; CHECK-LABEL: urem1x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    udiv w10, w9, w8
@@ -1333,6 +1394,8 @@ define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) {
 define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: urem2x32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w11, v1.s[1]
@@ -1343,6 +1406,7 @@ define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    msub w9, w13, w11, w12
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 	%tmp3 = urem <2 x i32> %A, %B;
 	ret <2 x i32> %tmp3
@@ -1379,6 +1443,8 @@ define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) {
 define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) {
 ; CHECK-LABEL: urem1x64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    udiv x10, x9, x8
@@ -1414,16 +1480,23 @@ define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) {
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    bl fmodf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 	%tmp3 = frem <2 x float> %A, %B;
@@ -1441,10 +1514,14 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    bl fmodf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -1452,6 +1529,7 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s1, v1.s[2]
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -1459,6 +1537,7 @@ define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) {
 ; CHECK-NEXT:    mov s1, v1.s[3]
 ; CHECK-NEXT:    bl fmodf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -1492,10 +1571,14 @@ define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) {
 ; CHECK-NEXT:    mov d0, v0.d[1]
 ; CHECK-NEXT:    mov d1, v1.d[1]
 ; CHECK-NEXT:    bl fmod
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    bl fmod
 ; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    add sp, sp, #64

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll b/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
index 4c8d9691b30181..091cda89bfe403 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-scalar-by-elem-mul.ll
@@ -4,6 +4,7 @@
 define float @test_fmul_lane_ss2S_0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -14,6 +15,7 @@ define float @test_fmul_lane_ss2S_0(float %a, <2 x float> %v) {
 define float @test_fmul_lane_ss2S_1(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -24,6 +26,7 @@ define float @test_fmul_lane_ss2S_1(float %a, <2 x float> %v) {
 define float @test_fmul_lane_ss2S_1_swap(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmul_lane_ss2S_1_swap:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -109,6 +112,7 @@ declare float @llvm.aarch64.neon.fmulx.f32(float, float)
 define float @test_fmulx_lane_f32_0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f32_0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx s0, s0, s1
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -119,6 +123,7 @@ define float @test_fmulx_lane_f32_0(float %a, <2 x float> %v) {
 define float @test_fmulx_lane_f32_1(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_lane_f32_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx s0, s0, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -202,6 +207,7 @@ define double @test_fmulx_laneq_f64_1_swap(double %a, <2 x double> %v) {
 define float @test_fmulx_horizontal_f32(<2 x float> %v) {
 ; CHECK-LABEL: test_fmulx_horizontal_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmulx s0, s0, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index faa537fab3cc3d..cad3fb58086d69 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -19,6 +19,8 @@ define <8x i8> @test_select_cc_v8i8_i8(i8 %a, i8 %b, <8x i8> %c, <8x i8> %d ) {
 define <8x i8> @test_select_cc_v8i8_f32(float %a, float %b, <8x i8> %c, <8x i8> %d ) {
 ; CHECK-LABEL: test_select_cc_v8i8_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
@@ -56,6 +58,8 @@ define <16x i8> @test_select_cc_v16i8_i8(i8 %a, i8 %b, <16x i8> %c, <16x i8> %d
 define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x i8> %d ) {
 ; CHECK-LABEL: test_select_cc_v16i8_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -68,6 +72,8 @@ define <16x i8> @test_select_cc_v16i8_f32(float %a, float %b, <16x i8> %c, <16x
 define <16x i8> @test_select_cc_v16i8_f64(double %a, double %b, <16x i8> %c, <16x i8> %d ) {
 ; CHECK-LABEL: test_select_cc_v16i8_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -163,6 +169,8 @@ define <2x i64> @test_select_cc_v2i64(i64 %a, i64 %b, <2x i64> %c, <2x i64> %d )
 define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
 ; CHECK-NEXT:    ret
@@ -174,6 +182,8 @@ define <1 x float> @test_select_cc_v1f32(float %a, float %b, <1 x float> %c, <1
 define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.8b, v2.8b, v3.8b
@@ -186,6 +196,8 @@ define <2 x float> @test_select_cc_v2f32(float %a, float %b, <2 x float> %c, <2
 define <4x float> @test_select_cc_v4f32(float %a, float %b, <4x float> %c, <4x float> %d ) {
 ; CHECK-LABEL: test_select_cc_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -236,6 +248,8 @@ define <1 x double> @test_select_cc_v1f64_icmp(i64 %a, i64 %b, <1 x double> %c,
 define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d ) {
 ; CHECK-LABEL: test_select_cc_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fcmeq v0.2d, v0.2d, v1.2d
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    bsl v0.16b, v2.16b, v3.16b
@@ -265,6 +279,8 @@ define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b
 define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-NEXT:    fcmeq v2.4s, v2.4s, v3.4s
 ; CHECK-NEXT:    dup v2.4s, v2.s[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
@@ -277,6 +293,8 @@ define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b
 define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    fcmeq v2.2d, v2.2d, v3.2d
 ; CHECK-NEXT:    dup v2.2d, v2.d[0]
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
index 1036dd28dddb3a..7d87be0ce8e1ca 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
@@ -403,13 +403,17 @@ entry:
 define <8 x i8> @test_vld1_lane_s8(ptr %a, <8 x i8> %b) {
 ; CHECK-GI-LABEL: test_vld1_lane_s8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ld1 { v0.b }[7], [x0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ;
 ; CHECK-SD-LABEL: test_vld1_lane_s8:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ldr b1, [x0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.b[7], v1.b[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -420,7 +424,9 @@ entry:
 define <4 x i16> @test_vld1_lane_s16(ptr %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vld1_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1 { v0.h }[3], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i16, ptr %a, align 2
@@ -431,7 +437,9 @@ entry:
 define <2 x i32> @test_vld1_lane_s32(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vld1_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr %a, align 4
@@ -453,7 +461,9 @@ entry:
 define <2 x float> @test_vld1_lane_f32(ptr %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vld1_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load float, ptr %a, align 4
@@ -596,6 +606,7 @@ entry:
 define void @test_vst1_lane_s8(ptr %a, <8 x i8> %b) {
 ; CHECK-LABEL: test_vst1_lane_s8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.b }[7], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -607,6 +618,7 @@ entry:
 define void @test_vst1_lane_s16(ptr %a, <4 x i16> %b) {
 ; CHECK-LABEL: test_vst1_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.h }[3], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -616,10 +628,16 @@ entry:
 }
 
 define void @test_vst1_lane0_s16(ptr %a, <4 x i16> %b) {
-; CHECK-LABEL: test_vst1_lane0_s16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str h0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-GI-LABEL: test_vst1_lane0_s16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str h0, [x0]
+; CHECK-GI-NEXT:    ret
+;
+; CHECK-SD-LABEL: test_vst1_lane0_s16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str h0, [x0]
+; CHECK-SD-NEXT:    ret
 entry:
   %0 = extractelement <4 x i16> %b, i32 0
   store i16 %0, ptr %a, align 2
@@ -629,6 +647,7 @@ entry:
 define void @test_vst1_lane_s32(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: test_vst1_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.s }[1], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -638,10 +657,16 @@ entry:
 }
 
 define void @test_vst1_lane0_s32(ptr %a, <2 x i32> %b) {
-; CHECK-LABEL: test_vst1_lane0_s32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str s0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-GI-LABEL: test_vst1_lane0_s32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str s0, [x0]
+; CHECK-GI-NEXT:    ret
+;
+; CHECK-SD-LABEL: test_vst1_lane0_s32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str s0, [x0]
+; CHECK-SD-NEXT:    ret
 entry:
   %0 = extractelement <2 x i32> %b, i32 0
   store i32 %0, ptr %a, align 4
@@ -649,10 +674,16 @@ entry:
 }
 
 define void @test_vst1_lane_s64(ptr %a, <1 x i64> %b) {
-; CHECK-LABEL: test_vst1_lane_s64:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str d0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-GI-LABEL: test_vst1_lane_s64:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str d0, [x0]
+; CHECK-GI-NEXT:    ret
+;
+; CHECK-SD-LABEL: test_vst1_lane_s64:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str d0, [x0]
+; CHECK-SD-NEXT:    ret
 entry:
   %0 = extractelement <1 x i64> %b, i32 0
   store i64 %0, ptr %a, align 8
@@ -662,6 +693,7 @@ entry:
 define void @test_vst1_lane_f32(ptr %a, <2 x float> %b) {
 ; CHECK-LABEL: test_vst1_lane_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    st1 { v0.s }[1], [x0]
 ; CHECK-NEXT:    ret
 entry:
@@ -671,10 +703,16 @@ entry:
 }
 
 define void @test_vst1_lane0_f32(ptr %a, <2 x float> %b) {
-; CHECK-LABEL: test_vst1_lane0_f32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    str s0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-GI-LABEL: test_vst1_lane0_f32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    str s0, [x0]
+; CHECK-GI-NEXT:    ret
+;
+; CHECK-SD-LABEL: test_vst1_lane0_f32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    str s0, [x0]
+; CHECK-SD-NEXT:    ret
 entry:
   %0 = extractelement <2 x float> %b, i32 0
   store float %0, ptr %a, align 4

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
index 7bc3edf916719f..c9a03c888d5b25 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-shift.ll
@@ -330,6 +330,7 @@ define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -344,6 +345,7 @@ define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
@@ -358,6 +360,7 @@ define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -372,6 +375,7 @@ define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -386,6 +390,7 @@ define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_u32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
@@ -400,6 +405,7 @@ define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vshrn_high_n_u64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    shrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -414,6 +420,7 @@ define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqshrun_high_n_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrun2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
@@ -427,6 +434,7 @@ define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqshrun_high_n_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrun2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
@@ -440,6 +448,7 @@ define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqshrun_high_n_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrun2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -453,6 +462,7 @@ define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vrshrn_high_n_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
@@ -466,6 +476,7 @@ define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vrshrn_high_n_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
@@ -479,6 +490,7 @@ define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vrshrn_high_n_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -492,6 +504,7 @@ define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqrshrun_high_n_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrun2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
@@ -505,6 +518,7 @@ define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqrshrun_high_n_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrun2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
@@ -518,6 +532,7 @@ define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqrshrun_high_n_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrun2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -531,6 +546,7 @@ define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
@@ -544,6 +560,7 @@ define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
@@ -557,6 +574,7 @@ define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -570,6 +588,7 @@ define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
@@ -583,6 +602,7 @@ define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_u32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
@@ -596,6 +616,7 @@ define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqshrn_high_n_u64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -609,6 +630,7 @@ define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
@@ -622,6 +644,7 @@ define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
@@ -635,6 +658,7 @@ define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>
@@ -648,6 +672,7 @@ define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqrshrn2 v0.16b, v1.8h, #3
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
@@ -661,6 +686,7 @@ define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_u32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqrshrn2 v0.8h, v1.4s, #9
 ; CHECK-NEXT:    ret
   %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
@@ -674,6 +700,7 @@ define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
 ; CHECK-LABEL: test_vqrshrn_high_n_u64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqrshrn2 v0.4s, v1.2d, #19
 ; CHECK-NEXT:    ret
   %1 = bitcast <2 x i32> %a to <1 x i64>

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
index a5990956852af7..96d5b5ef85415e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-vget.ll
@@ -5,6 +5,7 @@ define <8 x i8> @test_vget_high_s8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_high_s8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -15,6 +16,7 @@ define <4 x i16> @test_vget_high_s16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_s16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -25,6 +27,7 @@ define <2 x i32> @test_vget_high_s32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_high_s32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -35,6 +38,7 @@ define <1 x i64> @test_vget_high_s64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_high_s64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
@@ -45,6 +49,7 @@ define <8 x i8> @test_vget_high_u8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_high_u8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -55,6 +60,7 @@ define <4 x i16> @test_vget_high_u16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_u16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -65,6 +71,7 @@ define <2 x i32> @test_vget_high_u32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_high_u32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -75,6 +82,7 @@ define <1 x i64> @test_vget_high_u64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_high_u64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
@@ -85,6 +93,7 @@ define <1 x i64> @test_vget_high_p64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_high_p64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> <i32 1>
@@ -95,6 +104,7 @@ define <4 x i16> @test_vget_high_f16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_f16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -105,6 +115,7 @@ define <2 x float> @test_vget_high_f32(<4 x float> %a) {
 ; CHECK-LABEL: test_vget_high_f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 2, i32 3>
@@ -115,6 +126,7 @@ define <8 x i8> @test_vget_high_p8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_high_p8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -125,6 +137,7 @@ define <4 x i16> @test_vget_high_p16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_high_p16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -135,6 +148,7 @@ define <1 x double> @test_vget_high_f64(<2 x double> %a) {
 ; CHECK-LABEL: test_vget_high_f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> <i32 1>
@@ -144,6 +158,7 @@ entry:
 define <8 x i8> @test_vget_low_s8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_low_s8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -153,6 +168,7 @@ entry:
 define <4 x i16> @test_vget_low_s16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -162,6 +178,7 @@ entry:
 define <2 x i32> @test_vget_low_s32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_low_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -171,6 +188,7 @@ entry:
 define <1 x i64> @test_vget_low_s64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_low_s64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
@@ -180,6 +198,7 @@ entry:
 define <8 x i8> @test_vget_low_u8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_low_u8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -189,6 +208,7 @@ entry:
 define <4 x i16> @test_vget_low_u16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_u16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -198,6 +218,7 @@ entry:
 define <2 x i32> @test_vget_low_u32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vget_low_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
@@ -207,6 +228,7 @@ entry:
 define <1 x i64> @test_vget_low_u64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_low_u64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
@@ -216,6 +238,7 @@ entry:
 define <1 x i64> @test_vget_low_p64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vget_low_p64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x i64> %a, <2 x i64> undef, <1 x i32> zeroinitializer
@@ -225,6 +248,7 @@ entry:
 define <4 x i16> @test_vget_low_f16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -234,6 +258,7 @@ entry:
 define <2 x float> @test_vget_low_f32(<4 x float> %a) {
 ; CHECK-LABEL: test_vget_low_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
@@ -243,6 +268,7 @@ entry:
 define <8 x i8> @test_vget_low_p8(<16 x i8> %a) {
 ; CHECK-LABEL: test_vget_low_p8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -252,6 +278,7 @@ entry:
 define <4 x i16> @test_vget_low_p16(<8 x i16> %a) {
 ; CHECK-LABEL: test_vget_low_p16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -261,6 +288,7 @@ entry:
 define <1 x double> @test_vget_low_f64(<2 x double> %a) {
 ; CHECK-LABEL: test_vget_low_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <2 x double> %a, <2 x double> undef, <1 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
index eda4e4f743f6e7..6c70d19a977a53 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
@@ -8,6 +8,8 @@
 define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
 ; CHECK-LABEL: test_sext_extr_cmp_0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    cmp x9, x8
@@ -86,6 +88,8 @@ define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
 define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
 ; CHECK-LABEL: test_br_extr_cmp:
 ; CHECK:       // %bb.0: // %common.ret
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    cmp x9, x8
@@ -106,6 +110,7 @@ if.end:
 define <1 x float> @test_vselect_f32(<1 x float> %i105, <1 x float> %in) {
 ; CHECK-LABEL: test_vselect_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcmp s0, s0
 ; CHECK-NEXT:    cset w8, vs
 ; CHECK-NEXT:    fmov s2, w8
@@ -121,9 +126,12 @@ define <1 x float> @test_vselect_f32(<1 x float> %i105, <1 x float> %in) {
 define <1 x half> @test_vselect_f16(<1 x half> %i105, <1 x half> %in) {
 ; CHECK-LABEL: test_vselect_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fcvt s2, h0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fcmp s2, s2
 ; CHECK-NEXT:    fcsel s0, s1, s0, vs
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %i179 = fcmp uno <1 x half> %i105, zeroinitializer
   %i180 = select <1 x i1> %i179, <1 x half> %in, <1 x half> %i105
@@ -135,8 +143,11 @@ define <1 x half> @test_select_f16(half %a, half %b, <1 x half> %c, <1 x half> %
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvt s1, h1
 ; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    fcsel s0, s2, s3, eq
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %cmp31 = fcmp oeq half %a, %b
   %e = select i1 %cmp31, <1 x half> %c, <1 x half> %d

diff  --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
index 8d141b5995510f..3d52a2c044a0cb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
@@ -134,6 +134,7 @@ define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %acc, <4 x i32> %mhs, <4 x i32>
 define <4 x i16> @test_sqrdmlah_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlah_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    sqadd v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    ret
@@ -160,6 +161,7 @@ entry:
 define <2 x i32> @test_sqrdmlah_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlah_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    sqadd v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    ret
@@ -186,6 +188,7 @@ entry:
 define <4 x i16> @test_sqrdmlsh_lane_s16(<4 x i16> %acc, <4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.4h, v1.4h, v2.h[3]
 ; CHECK-NEXT:    sqsub v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    ret
@@ -212,6 +215,7 @@ entry:
 define <2 x i32> @test_sqrdmlsh_lane_s32(<2 x i32> %acc, <2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sqrdmulh v1.2s, v1.2s, v2.s[1]
 ; CHECK-NEXT:    sqsub v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    ret
@@ -243,6 +247,7 @@ entry:
 define i16 @test_sqrdmlah_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlah_extracted_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[1]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqadd v0.4h, v1.4h, v0.4h
@@ -277,6 +282,7 @@ entry:
 define i32 @test_sqrdmlah_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlah_extracted_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqadd s0, s1, s0
@@ -309,6 +315,7 @@ entry:
 define i16 @test_sqrdmlsh_extracted_lane_s16(i16 %acc,<4 x i16> %x, <4 x i16> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.4h, v0.4h, v1.h[1]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqsub v0.4h, v1.4h, v0.4h
@@ -343,6 +350,7 @@ entry:
 define i32 @test_sqrdmlsh_extracted_lane_s32(i32 %acc,<2 x i32> %x, <2 x i32> %v) {
 ; CHECK-LABEL: test_sqrdmlsh_extracted_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh v0.2s, v0.2s, v1.s[0]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqsub s0, s1, s0
@@ -492,6 +500,7 @@ define i16 @test_sqrdmlah_extract_i16(i16 %acc, i16 %x, <4 x i16> %y_vec) {
 ; CHECK-LABEL: test_sqrdmlah_extract_i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmulh v0.4h, v1.4h, v0.h[1]
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    sqadd v0.4h, v1.4h, v0.4h
@@ -639,6 +648,7 @@ define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlah v1.4h, v2.4h, v0.h[3]
 ; CHECK-NEXT:    umov w0, v1.h[0]
 ; CHECK-NEXT:    ret
@@ -656,6 +666,7 @@ define i32 @test_vqrdmlahs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlah s1, s2, v0.s[1]
 ; CHECK-NEXT:    fmov w0, s1
 ; CHECK-NEXT:    ret
@@ -777,6 +788,7 @@ define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlsh v1.4h, v2.4h, v0.h[3]
 ; CHECK-NEXT:    umov w0, v1.h[0]
 ; CHECK-NEXT:    ret
@@ -794,6 +806,7 @@ define i32 @test_vqrdmlshs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov s1, w0
 ; CHECK-NEXT:    fmov s2, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqrdmlsh s1, s2, v0.s[1]
 ; CHECK-NEXT:    fmov w0, s1
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
index 141d8d884f3374..80314765abdd1f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll
@@ -8,6 +8,7 @@ define void @test(ptr %p1, i32 %v1) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    fmov.2d v0, #2.00000000
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr s0, [x8]
@@ -27,6 +28,7 @@ define void @test2(ptr %p1, i32 %v1) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movi.16b v0, #63
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    ldr s0, [x8]
@@ -76,6 +78,7 @@ define <4 x float> @testv4i16(<2 x float> %l1) {
 ; CHECK-LABEL: testv4i16:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.4h v1, #16
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.d v1[1], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
index 25e5194d429a7c..0030e9ce80abb4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -39,6 +39,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ; CHECK-LABEL: cnt32_advsimd_2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    cnt.8b v0, v0
@@ -64,6 +65,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) {
 ;
 ; CHECK-CSSC-LABEL: cnt32_advsimd_2:
 ; CHECK-CSSC:       // %bb.0:
+; CHECK-CSSC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CSSC-NEXT:    fmov w8, s0
 ; CHECK-CSSC-NEXT:    cnt w0, w8
 ; CHECK-CSSC-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index 3deabfa66b36d3..f548a0e01feee6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -88,6 +88,7 @@ entry:
 define i64 @test_rev_x_srl32(i32 %a) {
 ; CHECK-SD-LABEL: test_rev_x_srl32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    rev x8, x0
 ; CHECK-SD-NEXT:    lsr x0, x8, #32
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
index 304f1b6632c2eb..da6499b7daa82e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -275,6 +275,7 @@ entry:
 define i64 @extendedLeftShiftintToint64By4(i32 %a) nounwind readnone ssp {
 ; CHECK-LABEL: extendedLeftShiftintToint64By4:
 ; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x8, x0, #4, #32
 ; CHECK-NEXT:    add x0, x8, #16
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 1591360f04ddf8..5806bcf0dacf16 100644
--- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -601,6 +601,7 @@ if.end:                                           ; preds = %for.body, %if.else
 define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 ; ENABLE-LABEL: callVariadicFunc:
 ; ENABLE:       ; %bb.0: ; %entry
+; ENABLE-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; ENABLE-NEXT:    cbz w0, LBB8_2
 ; ENABLE-NEXT:  ; %bb.1: ; %if.then
 ; ENABLE-NEXT:    sub sp, sp, #64
@@ -630,6 +631,7 @@ define i32 @callVariadicFunc(i32 %cond, i32 %N) {
 ; DISABLE-NEXT:    .cfi_def_cfa w29, 16
 ; DISABLE-NEXT:    .cfi_offset w30, -8
 ; DISABLE-NEXT:    .cfi_offset w29, -16
+; DISABLE-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; DISABLE-NEXT:    cbz w0, LBB8_2
 ; DISABLE-NEXT:  ; %bb.1: ; %if.then
 ; DISABLE-NEXT:    stp x1, x1, [sp, #32]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-stp.ll b/llvm/test/CodeGen/AArch64/arm64-stp.ll
index dbf6e5656aaa84..a393fcb17fcb2b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-stp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-stp.ll
@@ -139,6 +139,7 @@ define void @nosplat_v4i32(i32 %v, ptr %p) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov x8, sp
 ; CHECK-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-NEXT:    str w0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 94397e2eac0dc9..a6a825b26b3b52 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -948,6 +948,7 @@ define <64 x i8> @sext_v64i1(<64 x i1> %arg) {
 define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
 ; CHECK-SD-LABEL: sext_v1x64:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x0, d0
 ; CHECK-SD-NEXT:    asr x1, x0, #63
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
index 13684cc68b016a..a854cb7fec9917 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
@@ -21,55 +21,121 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
 }
 
 define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
-; CHECK-LABEL: tbl2_8b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbl.8b v0, { v0, v1 }, v2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbl2_8b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbl2_8b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
-; CHECK-LABEL: tbl2_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbl2_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbl2_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-LABEL: tbl3_8b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbl3_8b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbl3_8b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-LABEL: tbl3_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbl3_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbl3_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1, v2 }, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-LABEL: tbl4_8b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbl4_8b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbl4_8b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-LABEL: tbl4_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbl4_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbl4_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
   ret <16 x i8> %tmp3
 }
@@ -107,22 +173,32 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-SD-NEXT:    tbl.8b v0, { v0, v1 }, v4
 ; CHECK-SD-NEXT:    tbl.8b v1, { v2, v3 }, v4
 ; CHECK-SD-NEXT:    mov.s v0[1], v1[1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI8_1
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr d4, [x8, :lo12:.LCPI8_1]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
 ; CHECK-GI-NEXT:    tbl.8b v0, { v0, v1 }, v4
 ; CHECK-GI-NEXT:    tbl.8b v1, { v2, v3 }, v4
 ; CHECK-GI-NEXT:    mov.d v0[1], v1[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI8_0]
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0 }, v1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
   %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
@@ -186,15 +262,23 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8
 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI9_0
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI9_0]
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI9_1
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI9_1]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    adrp x8, .LCPI9_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
 ; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
@@ -247,7 +331,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fmov s4, w0
 ; CHECK-SD-NEXT:    mov w8, #32 // =0x20
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    mov.b v4[1], w0
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    mov.b v4[2], w0
 ; CHECK-SD-NEXT:    mov.b v4[3], w0
 ; CHECK-SD-NEXT:    mov.b v4[4], w0
@@ -276,6 +364,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fmov s4, w0
 ; CHECK-GI-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    mov.b v4[1], w0
 ; CHECK-GI-NEXT:    mov.b v4[2], w0
 ; CHECK-GI-NEXT:    mov.b v4[3], w0
@@ -360,7 +452,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    mov w8, #1 // =0x1
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    fmov s4, w8
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    mov.b v4[1], w8
 ; CHECK-SD-NEXT:    mov.b v4[2], w8
 ; CHECK-SD-NEXT:    mov.b v4[3], w8
@@ -390,7 +486,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    fmov s4, w8
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    mov.b v4[1], w8
 ; CHECK-GI-NEXT:    mov.b v4[2], w8
 ; CHECK-GI-NEXT:    mov.b v4[3], w8
@@ -495,7 +595,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    movi.2d v4, #0xffffffffffffffff
 ; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    ldr q5, [x8, :lo12:.LCPI12_0]
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    tbl.16b v2, { v2, v3 }, v5
 ; CHECK-SD-NEXT:    mov.b v4[0], w0
 ; CHECK-SD-NEXT:    mov.b v4[1], w0
@@ -515,7 +619,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x
 ; CHECK-GI-NEXT:    fmov s4, w0
 ; CHECK-GI-NEXT:    mov w8, #255 // =0xff
 ; CHECK-GI-NEXT:    adrp x9, .LCPI12_1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q5, [x9, :lo12:.LCPI12_1]
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov.b v4[1], w0
 ; CHECK-GI-NEXT:    tbl.16b v2, { v2, v3 }, v5
 ; CHECK-GI-NEXT:    mov.b v4[2], w0
@@ -634,6 +742,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    dup.16b v4, w0
 ; CHECK-SD-NEXT:    mov w8, #255 // =0xff
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-SD-NEXT:    mov.b v4[8], w8
 ; CHECK-SD-NEXT:    mov.b v4[9], w8
 ; CHECK-SD-NEXT:    mov.b v4[10], w8
@@ -653,6 +765,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fmov s4, w0
 ; CHECK-GI-NEXT:    mov w8, #255 // =0xff
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov.b v4[1], w0
 ; CHECK-GI-NEXT:    mov.b v4[2], w0
 ; CHECK-GI-NEXT:    mov.b v4[3], w0
@@ -754,15 +870,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16
 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI14_0]
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_1
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI14_1]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
 ; CHECK-GI-NEXT:    tbl.16b v1, { v2, v3 }, v4
@@ -848,16 +972,24 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b
 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_2
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI15_2]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI15_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
@@ -944,16 +1076,24 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8>
 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl.16b v0, { v0, v1, v2, v3 }, v4
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_2
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI16_2]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_1
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    ldr q5, [x8, :lo12:.LCPI16_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-GI-NEXT:    tbl.16b v0, { v0, v1 }, v4
@@ -995,55 +1135,121 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
 }
 
 define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
-; CHECK-LABEL: tbx2_8b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbx.8b v0, { v1, v2 }, v3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbx2_8b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbx2_8b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2 }, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
-; CHECK-LABEL: tbx2_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbx.16b v0, { v1, v2 }, v3
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbx2_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2 }, v3
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbx2_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2
+; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2 }, v3
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
-; CHECK-LABEL: tbx3_8b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbx3_8b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbx3_8b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2, v3 }, v4
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
-; CHECK-LABEL: tbx3_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbx3_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbx3_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3
+; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2, v3 }, v4
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
   ret <16 x i8> %tmp3
 }
 
 define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
-; CHECK-LABEL: tbx4_8b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbx4_8b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbx4_8b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    tbx.8b v0, { v1, v2, v3, v4 }, v5
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
   ret <8 x i8> %tmp3
 }
 
 define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
-; CHECK-LABEL: tbx4_16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: tbx4_16b:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-SD-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: tbx4_16b:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4
+; CHECK-GI-NEXT:    tbx.16b v0, { v1, v2, v3, v4 }, v5
+; CHECK-GI-NEXT:    ret
   %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
   ret <16 x i8> %tmp3
 }

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index b6dc0c83c5ac93..38a568ac919168 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1267,6 +1267,7 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    addhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1274,6 +1275,7 @@ define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add v1.8h, v1.8h, v2.8h
 ; CHECK-GI-NEXT:    shrn2 v0.16b, v1.8h, #8
 ; CHECK-GI-NEXT:    ret
@@ -1291,6 +1293,7 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    addhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1298,6 +1301,7 @@ define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add v1.4s, v1.4s, v2.4s
 ; CHECK-GI-NEXT:    shrn2 v0.8h, v1.4s, #16
 ; CHECK-GI-NEXT:    ret
@@ -1315,6 +1319,7 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    addhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1322,6 +1327,7 @@ define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add v1.2d, v1.2d, v2.2d
 ; CHECK-GI-NEXT:    shrn2 v0.4s, v1.2d, #32
 ; CHECK-GI-NEXT:    ret
@@ -1439,6 +1445,7 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    subhn2 v0.16b, v1.8h, v2.8h
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1446,6 +1453,7 @@ define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    sub v1.8h, v1.8h, v2.8h
 ; CHECK-GI-NEXT:    shrn2 v0.16b, v1.8h, #8
 ; CHECK-GI-NEXT:    ret
@@ -1463,6 +1471,7 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    subhn2 v0.8h, v1.4s, v2.4s
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1470,6 +1479,7 @@ define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    sub v1.4s, v1.4s, v2.4s
 ; CHECK-GI-NEXT:    shrn2 v0.8h, v1.4s, #16
 ; CHECK-GI-NEXT:    ret
@@ -1487,6 +1497,7 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ldr q1, [x0]
 ; CHECK-SD-NEXT:    ldr q2, [x1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    subhn2 v0.4s, v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1494,6 +1505,7 @@ define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, ptr %A, ptr %B) nounwind {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldr q1, [x0]
 ; CHECK-GI-NEXT:    ldr q2, [x1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    sub v1.2d, v1.2d, v2.2d
 ; CHECK-GI-NEXT:    shrn2 v0.4s, v1.2d, #32
 ; CHECK-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vaddv.ll b/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
index 4a23b3a984eb62..adfe28ea95893b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vaddv.ll
@@ -18,7 +18,9 @@ define <8 x i8> @test_vaddv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
 ; CHECK-LABEL: test_vaddv_s8_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv b1, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[3], v1.b[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a2)
@@ -43,7 +45,9 @@ define <4 x i16> @test_vaddv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
 ; CHECK-LABEL: test_vaddv_s16_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv h1, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a2)
@@ -68,7 +72,9 @@ define <2 x i32> @test_vaddv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
 ; CHECK-LABEL: test_vaddv_s32_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp v1.2s, v1.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a2)
@@ -115,7 +121,9 @@ define <8 x i8> @test_vaddv_u8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) {
 ; CHECK-LABEL: test_vaddv_u8_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv b1, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[3], v1.b[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a2)
@@ -152,7 +160,9 @@ define <4 x i16> @test_vaddv_u16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) {
 ; CHECK-LABEL: test_vaddv_u16_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addv h1, v1.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a2)
@@ -189,7 +199,9 @@ define <2 x i32> @test_vaddv_u32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) {
 ; CHECK-LABEL: test_vaddv_u32_used_by_laneop:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp v1.2s, v1.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a2)
@@ -255,6 +267,7 @@ define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) {
 ; CHECK-LABEL: test_vaddv_u64_to_vec:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vaddv.i = tail call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a1)

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
index e5764a0a478816..d4cc154ac6afc0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll
@@ -256,6 +256,7 @@ define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp {
 define half @test_vcvt_f16_f32(<1 x float> %x) {
 ; GENERIC-LABEL: test_vcvt_f16_f32:
 ; GENERIC:       // %bb.0:
+; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GENERIC-NEXT:    fcvt h0, s0
 ; GENERIC-NEXT:    ret
 ;
@@ -282,6 +283,7 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
 define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 ; GENERIC-LABEL: test_vcvt_high_f32_f64:
 ; GENERIC:       // %bb.0:
+; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GENERIC-NEXT:    fcvtn2 v0.4s, v1.2d
 ; GENERIC-NEXT:    ret
 ;
@@ -295,6 +297,7 @@ define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) noun
 ;
 ; GISEL-LABEL: test_vcvt_high_f32_f64:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    fcvtn2 v0.4s, v1.2d
 ; GISEL-NEXT:    ret
   %cvt = fptrunc <2 x double> %v to <2 x float>
@@ -319,6 +322,7 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp {
 define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp {
 ; GENERIC-LABEL: test_vcvtx_high_f32_f64:
 ; GENERIC:       // %bb.0:
+; GENERIC-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GENERIC-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; GENERIC-NEXT:    ret
 ;
@@ -332,6 +336,7 @@ define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nou
 ;
 ; GISEL-LABEL: test_vcvtx_high_f32_f64:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; GISEL-NEXT:    ret
   %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll b/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
index 4e167b3c7ce1cd..94074d1689f6a2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-insertion.ll
@@ -5,6 +5,7 @@ define void @test0f(ptr nocapture %x, float %a) #0 {
 ; CHECK-LABEL: test0f:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -18,6 +19,7 @@ define void @test1f(ptr nocapture %x, float %a) #0 {
 ; CHECK-LABEL: test1f:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fmov.4s v1, #1.00000000
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -104,6 +106,7 @@ define <16 x i8> @test_insert_v16i8_insert_2_undef_base_
diff erent_valeus(i8 %a,
 define <8 x half> @test_insert_v8f16_insert_1(half %a) {
 ; CHECK-LABEL: test_insert_v8f16_insert_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup.8h v0, v0[0]
 ; CHECK-NEXT:    mov.h v0[7], wzr
 ; CHECK-NEXT:    ret
@@ -122,6 +125,7 @@ define <8 x half> @test_insert_v8f16_insert_2(half %a) {
 ; CHECK-LABEL: test_insert_v8f16_insert_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    mov.h v1[1], v0[0]
 ; CHECK-NEXT:    mov.h v1[2], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
@@ -197,6 +201,7 @@ define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
 ; CHECK-LABEL: test_insert_v2f32_undef_zero_vector:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d1, #0000000000000000
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[1], v0[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -207,6 +212,7 @@ define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) {
 define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
 ; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    mov.s v0[3], wzr
 ; CHECK-NEXT:    ret
@@ -219,6 +225,7 @@ define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
 define <4 x float> @test_insert_3_f32_undef(float %a) {
 ; CHECK-LABEL: test_insert_3_f32_undef:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup.4s v0, v0[0]
 ; CHECK-NEXT:    ret
   %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %a, i32 0
@@ -231,6 +238,7 @@ define <4 x float> @test_insert_2_f32_undef_zero(float %a) {
 ; CHECK-LABEL: test_insert_2_f32_undef_zero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    mov.s v1[2], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
@@ -244,6 +252,7 @@ define <2 x double> @test_insert_v2f64_undef_insert1(double %a) {
 ; CHECK-LABEL: test_insert_v2f64_undef_insert1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.2d v1, #0000000000000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.d v1[0], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
 ; CHECK-NEXT:    ret
@@ -254,6 +263,7 @@ define <2 x double> @test_insert_v2f64_undef_insert1(double %a) {
 define <4 x float> @test_insert_2_f32_var(float %a, <4 x float> %b) {
 ; CHECK-LABEL: test_insert_2_f32_var:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov.s v1[0], v0[0]
 ; CHECK-NEXT:    mov.s v1[2], v0[0]
 ; CHECK-NEXT:    mov.16b v0, v1
@@ -276,7 +286,9 @@ define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
 define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
 ; CHECK-LABEL: test_insert_v4f16_f16_zero:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.h v0[0], wzr
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v.0 = insertelement <4 x half> %a, half 0.000000e+00, i32 0
   ret <4 x half> %v.0
@@ -294,7 +306,9 @@ define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
 define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
 ; CHECK-LABEL: test_insert_v2f32_f32_zero:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov.s v0[0], wzr
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v.0 = insertelement <2 x float> %a, float 0.000000e+00, i32 0
   ret <2 x float> %v.0

diff  --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index ebb7062a1faff5..499786470d4ac1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -918,6 +918,7 @@ define <2 x double> @fmls_commuted_neg_2d(ptr %A, ptr %B, ptr %C) nounwind {
 define <2 x float> @fmls_indexed_2s(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmls_indexed_2s:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmls.2s v0, v2, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -954,6 +955,7 @@ entry:
 define <2 x float> @fmla_indexed_scalar_2s(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmla_indexed_scalar_2s:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $d2
 ; CHECK-NEXT:    fmla.2s v0, v1, v2
 ; CHECK-NEXT:    ret
 entry:
@@ -966,6 +968,7 @@ entry:
 define <4 x float> @fmla_indexed_scalar_4s(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmla_indexed_scalar_4s:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla.4s v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -980,6 +983,7 @@ entry:
 define <2 x double> @fmla_indexed_scalar_2d(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
 ; CHECK-LABEL: fmla_indexed_scalar_2d:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla.2d v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -992,6 +996,7 @@ entry:
 define <2 x float> @fmls_indexed_2s_strict(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmls_indexed_2s_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmls.2s v0, v2, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1028,6 +1033,7 @@ entry:
 define <2 x float> @fmla_indexed_scalar_2s_strict(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmla_indexed_scalar_2s_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla.2s v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1040,6 +1046,7 @@ entry:
 define <4 x float> @fmla_indexed_scalar_4s_strict(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmla_indexed_scalar_4s_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    fmla.4s v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1054,6 +1061,7 @@ entry:
 define <2 x double> @fmla_indexed_scalar_2d_strict(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp strictfp {
 ; CHECK-LABEL: fmla_indexed_scalar_2d_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla.2d v0, v1, v2[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1072,6 +1080,7 @@ declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x
 define <4 x i16> @mul_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: mul_4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.4h v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1092,6 +1101,7 @@ define <8 x i16> @mul_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <2 x i32> @mul_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: mul_2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1128,6 +1138,7 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind {
 define <2 x float> @fmul_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
 ; CHECK-LABEL: fmul_lane_2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
@@ -1180,6 +1191,7 @@ define double @fmul_lane_d(double %A, <2 x double> %vec) nounwind {
 define <2 x float> @fmulx_lane_2s(<2 x float> %A, <2 x float> %B) nounwind {
 ; CHECK-LABEL: fmulx_lane_2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x float> %B, <2 x float> poison, <2 x i32> <i32 1, i32 1>
@@ -1210,6 +1222,7 @@ define <2 x double> @fmulx_lane_2d(<2 x double> %A, <2 x double> %B) nounwind {
 define <4 x i16> @sqdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: sqdmulh_lane_4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh.4h v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1230,6 +1243,7 @@ define <8 x i16> @sqdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <2 x i32> @sqdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: sqdmulh_lane_2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmulh.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1262,6 +1276,7 @@ define i32 @sqdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
 define <4 x i16> @sqrdmulh_lane_4h(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: sqrdmulh_lane_4h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh.4h v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1282,6 +1297,7 @@ define <8 x i16> @sqrdmulh_lane_8h(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <2 x i32> @sqrdmulh_lane_2s(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: sqrdmulh_lane_2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqrdmulh.2s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1314,6 +1330,7 @@ define i32 @sqrdmulh_lane_1s(i32 %A, <4 x i32> %B) nounwind {
 define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: sqdmull_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull.4s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1324,6 +1341,7 @@ define <4 x i32> @sqdmull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 define <2 x i64> @sqdmull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: sqdmull_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull.2d v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1356,6 +1374,7 @@ define <2 x i64> @sqdmull2_lane_2d(<4 x i32> %A, <4 x i32> %B) nounwind {
 define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: umull_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull.4s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1366,6 +1385,7 @@ define <4 x i32> @umull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: umull_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umull.2d v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1376,6 +1396,7 @@ define <2 x i64> @umull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 ; CHECK-LABEL: smull_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull.4s v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <4 x i16> %B, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1386,6 +1407,7 @@ define <4 x i32> @smull_lane_4s(<4 x i16> %A, <4 x i16> %B) nounwind {
 define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 ; CHECK-LABEL: smull_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smull.2d v0, v0, v1[1]
 ; CHECK-NEXT:    ret
   %tmp3 = shufflevector <2 x i32> %B, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
@@ -1396,6 +1418,7 @@ define <2 x i64> @smull_lane_2d(<2 x i32> %A, <2 x i32> %B) nounwind {
 define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: smlal_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlal.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1408,6 +1431,7 @@ define <4 x i32> @smlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: smlal_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlal.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1420,6 +1444,7 @@ define <2 x i64> @smlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi
 define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: sqdmlal_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlal.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1432,6 +1457,7 @@ define <4 x i32> @sqdmlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) noun
 define <2 x i64> @sqdmlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: sqdmlal_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlal.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1472,6 +1498,7 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, w1
 ; CHECK-NEXT:    fmov s2, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlal.h s2, h1, v0[1]
 ; CHECK-NEXT:    fmov w0, s2
 ; CHECK-NEXT:    ret
@@ -1489,6 +1516,7 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, w1
 ; CHECK-NEXT:    fmov s2, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlsl.h s2, h1, v0[1]
 ; CHECK-NEXT:    fmov w0, s2
 ; CHECK-NEXT:    ret
@@ -1538,6 +1566,7 @@ define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d1, x0
 ; CHECK-NEXT:    fmov s2, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlal.s d1, s2, v0[1]
 ; CHECK-NEXT:    fmov x0, d1
 ; CHECK-NEXT:    ret
@@ -1554,6 +1583,7 @@ define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d1, x0
 ; CHECK-NEXT:    fmov s2, w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqdmlsl.s d1, s2, v0[1]
 ; CHECK-NEXT:    fmov x0, d1
 ; CHECK-NEXT:    ret
@@ -1568,6 +1598,7 @@ declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64)
 define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: umlal_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlal.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1580,6 +1611,7 @@ define <4 x i32> @umlal_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: umlal_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlal.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1593,6 +1625,7 @@ define <2 x i64> @umlal_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi
 define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: smlsl_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlsl.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1605,6 +1638,7 @@ define <4 x i32> @smlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: smlsl_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    smlsl.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1617,6 +1651,7 @@ define <2 x i64> @smlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwi
 define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: sqdmlsl_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlsl.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1629,6 +1664,7 @@ define <4 x i32> @sqdmlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) noun
 define <2 x i64> @sqdmlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: sqdmlsl_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmlsl.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1667,6 +1703,7 @@ define <2 x i64> @sqdmlsl2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nou
 define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwind {
 ; CHECK-LABEL: umlsl_lane_4s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlsl.4s v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1679,6 +1716,7 @@ define <4 x i32> @umlsl_lane_4s(<4 x i16> %A, <4 x i16> %B, <4 x i32> %C) nounwi
 define <2 x i64> @umlsl_lane_2d(<2 x i32> %A, <2 x i32> %B, <2 x i64> %C) nounwind {
 ; CHECK-LABEL: umlsl_lane_2d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umlsl.2d v2, v0, v1[1]
 ; CHECK-NEXT:    mov.16b v0, v2
 ; CHECK-NEXT:    ret
@@ -1835,6 +1873,7 @@ define <2 x i64> @foo5(<4 x i32> %a, <4 x i32> %b) nounwind {
 define <4 x i32> @foo6(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo6:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull2.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1849,6 +1888,7 @@ entry:
 define <4 x i32> @foo6a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo6a:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1863,6 +1903,7 @@ entry:
 define <2 x i64> @foo7(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo7:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull2.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1877,6 +1918,7 @@ entry:
 define <2 x i64> @foo7a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo7a:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smull.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1892,6 +1934,7 @@ entry:
 define <4 x i32> @foo8(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull2.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1906,6 +1949,7 @@ entry:
 define <4 x i32> @foo8a(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo8a:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1920,6 +1964,7 @@ entry:
 define <2 x i64> @foo9(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo9:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull2.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1934,6 +1979,7 @@ entry:
 define <2 x i64> @foo9a(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: foo9a:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umull.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -2044,6 +2090,7 @@ define <2 x i64> @bar5(<2 x i64> %a, <4 x i32> %b, <4 x i32> %c) nounwind {
 define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 ; CHECK-LABEL: mlal2_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2.4s v0, v1, v2[3]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -2061,6 +2108,7 @@ define <4 x i32> @mlal2_1(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 ; CHECK-LABEL: mlal2_2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    smlal2.2d v0, v1, v2[1]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2078,6 +2126,7 @@ define <2 x i64> @mlal2_2(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 ; CHECK-LABEL: mlal2_4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2.4s v0, v1, v2[2]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <4 x i16> %c, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
@@ -2095,6 +2144,7 @@ define <4 x i32> @mlal2_4(<4 x i32> %a, <8 x i16> %b, <4 x i16> %c) nounwind {
 define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 ; CHECK-LABEL: mlal2_5:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umlal2.2d v0, v1, v2[0]
 ; CHECK-NEXT:    ret
   %shuffle = shufflevector <2 x i32> %c, <2 x i32> undef, <4 x i32> zeroinitializer
@@ -2113,6 +2163,7 @@ define <2 x i64> @mlal2_5(<2 x i64> %a, <4 x i32> %b, <2 x i32> %c) nounwind {
 define <2 x double> @vmulq_n_f64(<2 x double> %x, double %y) nounwind readnone ssp {
 ; CHECK-LABEL: vmulq_n_f64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul.2d v0, v0, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2125,6 +2176,7 @@ entry:
 define <4 x float> @vmulq_n_f32(<4 x float> %x, float %y) nounwind readnone ssp {
 ; CHECK-LABEL: vmulq_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul.4s v0, v0, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2139,6 +2191,7 @@ entry:
 define <2 x float> @vmul_n_f32(<2 x float> %x, float %y) nounwind readnone ssp {
 ; CHECK-LABEL: vmul_n_f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    fmul.2s v0, v0, v1[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -2347,6 +2400,7 @@ define <4 x i32> @vmul_built_dup_test(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: vmul_built_dup_fromsmall_test:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.4h v0, v0, v1[3]
 ; CHECK-NEXT:    ret
   %vget_lane = extractelement <4 x i16> %b, i32 3
@@ -2361,6 +2415,7 @@ define <4 x i16> @vmul_built_dup_fromsmall_test(<4 x i16> %a, <4 x i16> %b) {
 define <8 x i16> @vmulq_built_dup_fromsmall_test(<8 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: vmulq_built_dup_fromsmall_test:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mul.8h v0, v0, v1[0]
 ; CHECK-NEXT:    ret
   %vget_lane = extractelement <4 x i16> %b, i32 0
@@ -2464,6 +2519,7 @@ define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
 define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK-LABEL: pmull_from_extract_duplane_low:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup.8b v1, v1[0]
 ; CHECK-NEXT:    pmull.8h v0, v0, v1
 ; CHECK-NEXT:    ret
@@ -2477,6 +2533,7 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs)
 define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
 ; CHECK-LABEL: pmull_from_extract_duplane_high:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup.16b v1, v1[0]
 ; CHECK-NEXT:    pmull2.8h v0, v0, v1
 ; CHECK-NEXT:    ret
@@ -2576,6 +2633,7 @@ define float @scalar_fmla_from_extract_v4f32(float %accum, float %lhs, <4 x floa
 define float @scalar_fmla_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
 ; CHECK-LABEL: scalar_fmla_from_extract_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla.s s0, s1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs = extractelement <2 x float> %rvec, i32 1
@@ -2597,6 +2655,7 @@ define float @scalar_fmls_from_extract_v4f32(float %accum, float %lhs, <4 x floa
 define float @scalar_fmls_from_extract_v2f32(float %accum, float %lhs, <2 x float> %rvec) {
 ; CHECK-LABEL: scalar_fmls_from_extract_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls.s s0, s1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs.scal = extractelement <2 x float> %rvec, i32 1
@@ -2644,6 +2703,7 @@ define <2 x float> @fmls_with_fneg_before_extract_v2f32(<2 x float> %accum, <2 x
 define <2 x float> @fmls_with_fneg_before_extract_v2f32_1(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) {
 ; CHECK-LABEL: fmls_with_fneg_before_extract_v2f32_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls.2s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
@@ -2666,6 +2726,7 @@ define <4 x float> @fmls_with_fneg_before_extract_v4f32(<4 x float> %accum, <4 x
 define <4 x float> @fmls_with_fneg_before_extract_v4f32_1(<4 x float> %accum, <4 x float> %lhs, <2 x float> %rhs) {
 ; CHECK-LABEL: fmls_with_fneg_before_extract_v4f32_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls.4s v0, v1, v2[1]
 ; CHECK-NEXT:    ret
   %rhs_neg = fsub <2 x float> <float -0.0, float -0.0>, %rhs
@@ -2792,6 +2853,8 @@ declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64)
 define <1 x i64> @test_mul_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) nounwind {
 ; CHECK-LABEL: test_mul_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d1
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mul x8, x9, x8

diff  --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index b542bca716d9b2..9955b253f563e9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -219,6 +219,8 @@ define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind {
 define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-LABEL: combine_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -228,6 +230,8 @@ define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) {
 define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine2_v16i8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -247,6 +251,8 @@ define <16 x i8> @combine2_v16i8(<8 x i8> %0, <8 x i8> %1) {
 define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
 ; CHECK-LABEL: combine_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -256,6 +262,8 @@ define <8 x i16> @combine_v8i16(<4 x i16> %0, <4 x i16> %1) {
 define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
 ; CHECK-SD-LABEL: combine2_v8i16:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -275,6 +283,8 @@ define <8 x i16> @combine2_v8i16(<4 x i16> %0, <4 x i16> %1) {
 define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
 ; CHECK-LABEL: combine_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.4s v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -284,6 +294,8 @@ define <4 x i32> @combine_v4i32(<2 x i32> %0, <2 x i32> %1) {
 define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
 ; CHECK-SD-LABEL: combine2_v4i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.4s v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -303,6 +315,8 @@ define <4 x i32> @combine2_v4i32(<2 x i32> %0, <2 x i32> %1) {
 define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-LABEL: combine_v16i8_undef:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
@@ -312,6 +326,8 @@ define <16 x i8> @combine_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine2_v16i8_undef:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    zip1.16b v0, v0, v1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -331,6 +347,8 @@ define <16 x i8> @combine2_v16i8_undef(<8 x i8> %0, <8 x i8> %1) {
 define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
 ; CHECK-LABEL: combine_v8i16_undef:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1.8h v0, v0, v1
 ; CHECK-NEXT:    ret
   %3 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 undef, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
@@ -341,18 +359,20 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) {
 define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine_v8i16_8first:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fmov d31, d1
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
 ; CHECK-SD-NEXT:    adrp x8, .LCPI25_0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI25_0]
-; CHECK-SD-NEXT:    tbl.16b v0, { v31, v0 }, v1
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI25_0]
+; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: combine_v8i16_8first:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI25_0
-; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI25_0]
-; CHECK-GI-NEXT:    tbl.16b v0, { v1, v2 }, v0
+; CHECK-GI-NEXT:    fmov d31, d1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI25_0]
+; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
 ; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
   ret <16 x i8> %3
@@ -363,18 +383,20 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) {
 define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) {
 ; CHECK-SD-LABEL: combine_v8i16_8firstundef:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    fmov d31, d1
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1_q2
 ; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
-; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI26_0]
-; CHECK-SD-NEXT:    tbl.16b v0, { v31, v0 }, v1
+; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI26_0]
+; CHECK-SD-NEXT:    tbl.16b v0, { v1, v2 }, v3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: combine_v8i16_8firstundef:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI26_0
-; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
-; CHECK-GI-NEXT:    tbl.16b v0, { v1, v2 }, v0
+; CHECK-GI-NEXT:    fmov d31, d1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI26_0]
+; CHECK-GI-NEXT:    tbl.16b v0, { v31, v0 }, v2
 ; CHECK-GI-NEXT:    ret
   %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 undef>
   ret <16 x i8> %3

diff  --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
index cc70dc446324da..ad073d96a14830 100644
--- a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll
@@ -43,6 +43,7 @@ define i8 @test_valid_wrap_optimizable2(ptr %base, i32 %offset) {
 ; CHECK-LABEL: test_valid_wrap_optimizable2:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    mov w8, #-100 ; =0xffffff9c
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldrb w0, [x8, w1, sxtw]
 ; CHECK-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 6e7398b63e6d8f..6aeeeed94543d3 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -410,6 +410,7 @@ define %T2 @simple_struct(%T1 %0, %T2 %1, %T3, %T4) nounwind {
 ; CHECK-NEXT:     ldr     x8, [x2]
 ; CHECK-NEXT:     ldr     x10, [x3]
 ; CHECK-NEXT:     ldr     d1, [x2, #8]
+; CHECK-NEXT:                                     // kill: def $w1 killed $w1 killed $x1
 ; CHECK-NEXT:     ldr     s0, [sp, #12]
 ; CHECK-NEXT:     ldr     d2, [x3, #8]
 ; CHECK-NEXT:     mov     x2, x8
@@ -510,6 +511,7 @@ define <4 x i8> @small_vector(<4 x i8> %0) {
 ; CHECK-NEXT:     str	w0, [sp, #12]
 ; CHECK-NEXT:     ldr	s0, [sp, #12]
 ; CHECK-NEXT:     ushll	v0.8h, v0.8b, #0
+; CHECK-NEXT:                                           // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:     blr	x9
 ; CHECK-NEXT:     uzp1	v0.8b, v0.8b, v0.8b
 ; CHECK-NEXT:     adrp	x9, __os_arm64x_dispatch_ret

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index e7c6c643d8adeb..dcc675839b714b 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -424,6 +424,7 @@ declare %T2 @simple_struct(%T1, %T2, %T3, %T4) nounwind;
 ; CHECK-NEXT:     str     x8, [sp, #40]
 ; CHECK-NEXT:     mov     x0, x8
 ; CHECK-NEXT:     ldr     s0, [sp, #44]
+; CHECK-NEXT:                                     // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:     .seh_startepilogue
 ; CHECK-NEXT:     ldp     x29, x30, [sp, #96]             // 16-byte Folded Reload
 ; CHECK-NEXT:     .seh_save_fplr  96
@@ -476,6 +477,7 @@ declare <4 x i8> @small_vector(<4 x i8> %0) nounwind;
 ; CHECK-NEXT:     stur	w8, [x29, #-8]
 ; CHECK-NEXT:     ldur	s0, [x29, #-8]
 ; CHECK-NEXT:     ushll	v0.8h, v0.8b, #0
+; CHECK-NEXT:                                           // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:     .seh_startepilogue
 ; CHECK-NEXT:     ldp	x29, x30, [sp, #48]             // 16-byte Folded Reload
 ; CHECK-NEXT:     .seh_save_fplr	48

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
index 884dd3ac9860b7..70f3b5cc488ea8 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll
@@ -562,6 +562,7 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -589,6 +590,7 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -880,6 +882,7 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -907,6 +910,7 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -7226,6 +7230,7 @@ define dso_local i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -7253,6 +7258,7 @@ define dso_local i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -7384,6 +7390,7 @@ define dso_local i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
@@ -7411,6 +7418,7 @@ define dso_local i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -7542,6 +7550,7 @@ define dso_local i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
@@ -7569,6 +7578,7 @@ define dso_local i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
@@ -7700,6 +7710,7 @@ define dso_local i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset release
 ; CHECK-NOT: dmb
@@ -7727,6 +7738,7 @@ define dso_local i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset release
 ; CHECK-NOT: dmb
@@ -7858,6 +7870,7 @@ define dso_local i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -7885,6 +7898,7 @@ define dso_local i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -8016,6 +8030,7 @@ define dso_local i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -8043,6 +8058,7 @@ define dso_local i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acq_rel
 ; CHECK-NOT: dmb
@@ -8174,6 +8190,7 @@ define dso_local i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset acquire
 ; CHECK-NOT: dmb
@@ -8201,6 +8218,7 @@ define dso_local i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
 ; CHECK-NOT: dmb
@@ -8332,6 +8350,7 @@ define dso_local i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
 ; CHECK-NOT: dmb
@@ -8359,6 +8378,7 @@ define dso_local i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind
 ; OUTLINE-ATOMICS-NEXT:    stxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset monotonic
 ; CHECK-NOT: dmb
@@ -8490,6 +8510,7 @@ define dso_local i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset release
 ; CHECK-NOT: dmb
@@ -8517,6 +8538,7 @@ define dso_local i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset release
 ; CHECK-NOT: dmb
@@ -8648,6 +8670,7 @@ define dso_local i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrb w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset seq_cst
 ; CHECK-NOT: dmb
@@ -8675,6 +8698,7 @@ define dso_local i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind {
 ; OUTLINE-ATOMICS-NEXT:    stlxrh w11, w10, [x8]
 ; OUTLINE-ATOMICS-NEXT:    cbnz w11, .LBB[[LOOPSTART]]
 ; OUTLINE-ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; OUTLINE-ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; OUTLINE-ATOMICS-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset seq_cst
 ; CHECK-NOT: dmb

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
index bbf0afec219b00..fdb14606d463b7 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll
@@ -374,6 +374,7 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
 ; CHECK-NEXT:  .LBB20_1: // %atomicrmw.start
@@ -391,6 +392,7 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; CHECK-LABEL: test_atomic_load_xchg_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
 ; CHECK-NEXT:  .LBB21_1: // %atomicrmw.start
@@ -418,6 +420,7 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; CHECK-NEXT:    stlxr w10, w8, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB22_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw xchg ptr @var32, i32 %offset release
    ret i32 %old
@@ -613,6 +616,7 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB32_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
    ret i8 %old
@@ -632,6 +636,7 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB33_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
    ret i16 %old
@@ -690,6 +695,7 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stlxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB36_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
    ret i8 %old
@@ -709,6 +715,7 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB37_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
    ret i16 %old
@@ -756,6 +763,7 @@ define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and w8, w0, #0xff
 ; CHECK-NEXT:    adrp x9, var8
 ; CHECK-NEXT:    add x9, x9, :lo12:var8
@@ -769,9 +777,11 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-NEXT:    stxrb w10, w1, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB40_1
 ; CHECK-NEXT:  // %bb.3: // %cmpxchg.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB40_4: // %cmpxchg.nostore
 ; CHECK-NEXT:    clrex
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire
    %old = extractvalue { i8, i1 } %pair, 0
@@ -781,6 +791,7 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-LABEL: test_atomic_cmpxchg_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and w8, w0, #0xffff
 ; CHECK-NEXT:    adrp x9, var16
 ; CHECK-NEXT:    add x9, x9, :lo12:var16
@@ -795,9 +806,11 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-NEXT:    cbnz w10, .LBB41_1
 ; CHECK-NEXT:  // %bb.3: // %cmpxchg.success
 ; CHECK-NEXT:    dmb ish
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB41_4: // %cmpxchg.nostore
 ; CHECK-NEXT:    clrex
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst
    %old = extractvalue { i16, i1 } %pair, 0
@@ -820,9 +833,11 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK-NEXT:    stlxr w10, w1, [x9]
 ; CHECK-NEXT:    cbnz w10, .LBB42_1
 ; CHECK-NEXT:  // %bb.3: // %cmpxchg.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB42_4: // %cmpxchg.nostore
 ; CHECK-NEXT:    clrex
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release monotonic
    %old = extractvalue { i32, i1 } %pair, 0

diff  --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index a7311154c4f5d7..679065529090f0 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -558,6 +558,7 @@ define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
 ; INLINE_ATOMICS-NEXT:  .LBB20_1: // %atomicrmw.start
@@ -584,6 +585,7 @@ define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind {
 define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_load_xchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
 ; INLINE_ATOMICS-NEXT:  .LBB21_1: // %atomicrmw.start
@@ -619,6 +621,7 @@ define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind {
 ; INLINE_ATOMICS-NEXT:    stlxr w10, w8, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB22_1
 ; INLINE_ATOMICS-NEXT:  // %bb.2: // %atomicrmw.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_load_xchg_i32:
@@ -830,6 +833,7 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB32_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var8, i8 %offset monotonic
    ret i8 %old
@@ -849,6 +853,7 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB33_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umin ptr @var16, i16 %offset acquire
    ret i16 %old
@@ -906,6 +911,7 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind {
 ; CHECK-NEXT:    stlxrb w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB36_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var8, i8 %offset acq_rel
    ret i8 %old
@@ -925,6 +931,7 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind {
 ; CHECK-NEXT:    stxrh w11, w10, [x8]
 ; CHECK-NEXT:    cbnz w11, .LBB37_1
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
    %old = atomicrmw umax ptr @var16, i16 %offset monotonic
    ret i16 %old
@@ -971,6 +978,7 @@ define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind {
 define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
 ; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; INLINE_ATOMICS-NEXT:    and w8, w0, #0xff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var8
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var8
@@ -984,9 +992,11 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    stxrb w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB40_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB40_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i8:
@@ -1005,6 +1015,7 @@ define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; INLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
 ; INLINE_ATOMICS:       // %bb.0:
+; INLINE_ATOMICS-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; INLINE_ATOMICS-NEXT:    and w8, w0, #0xffff
 ; INLINE_ATOMICS-NEXT:    adrp x9, var16
 ; INLINE_ATOMICS-NEXT:    add x9, x9, :lo12:var16
@@ -1018,9 +1029,11 @@ define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    stlxrh w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB41_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB41_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i16:
@@ -1052,9 +1065,11 @@ define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; INLINE_ATOMICS-NEXT:    stlxr w10, w1, [x9]
 ; INLINE_ATOMICS-NEXT:    cbnz w10, .LBB42_1
 ; INLINE_ATOMICS-NEXT:  // %bb.3: // %cmpxchg.end
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ; INLINE_ATOMICS-NEXT:  .LBB42_4: // %cmpxchg.nostore
 ; INLINE_ATOMICS-NEXT:    clrex
+; INLINE_ATOMICS-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; INLINE_ATOMICS-NEXT:    ret
 ;
 ; OUTLINE_ATOMICS-LABEL: test_atomic_cmpxchg_i32:

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 7d1f901af1a15d..ed9c1b037d0cc7 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -18,6 +18,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
@@ -37,6 +38,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align2:
@@ -81,6 +83,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -103,6 +106,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
@@ -122,6 +126,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_f16_seq_cst_align4:
@@ -166,6 +171,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -176,6 +182,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -195,13 +202,15 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -222,6 +231,7 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
@@ -261,6 +271,7 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 2
@@ -270,6 +281,7 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -289,13 +301,15 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -316,6 +330,7 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
@@ -355,6 +370,7 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, bfloat %value seq_cst, align 4
@@ -428,6 +444,7 @@ define float @test_atomicrmw_fadd_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fadd ptr %ptr, float %value seq_cst, align 4
@@ -682,6 +699,7 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2f16_seq_cst_align4:
@@ -796,6 +814,7 @@ define <2 x bfloat> @test_atomicrmw_fadd_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB8_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_v2bf16_seq_cst_align4:
@@ -918,6 +937,7 @@ define <2 x float> @test_atomicrmw_fadd_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl __addsf3
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
+; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
@@ -971,10 +991,10 @@ define <2 x double> @test_atomicrmw_fadd_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fadd v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    mov x5, v2.d[1]
+; LSE-NEXT:    mov x6, x2
 ; LSE-NEXT:    fmov x4, d2
-; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index 2e60365903ef27..888b795876f7df 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -20,6 +20,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
@@ -39,6 +40,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align2:
@@ -83,6 +85,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -105,6 +108,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
@@ -124,6 +128,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_f16_seq_cst_align4:
@@ -168,6 +173,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -178,6 +184,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -197,13 +204,15 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -224,6 +233,7 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align2:
@@ -263,6 +273,7 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 2
@@ -272,6 +283,7 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -291,13 +303,15 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -318,6 +332,7 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_bf16_seq_cst_align4:
@@ -357,6 +372,7 @@ define bfloat @test_atomicrmw_fmax_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, bfloat %value seq_cst, align 4
@@ -430,6 +446,7 @@ define float @test_atomicrmw_fmax_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmax ptr %ptr, float %value seq_cst, align 4
@@ -514,6 +531,7 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
 define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fcvt s0, h0
 ; NOLSE-NEXT:    fcvt s1, h1
@@ -538,6 +556,7 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fcvt s2, h0
 ; LSE-NEXT:    ldr s0, [x0]
@@ -560,6 +579,7 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB6_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2f16_seq_cst_align4:
@@ -632,6 +652,7 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fmov w10, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
@@ -644,15 +665,16 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; NOLSE-NEXT:    ldaxr w9, [x0]
 ; NOLSE-NEXT:    fmov s2, w9
-; NOLSE-NEXT:    lsl w11, w9, #16
 ; NOLSE-NEXT:    mov h3, v2.h[1]
+; NOLSE-NEXT:    fmov w11, s2
+; NOLSE-NEXT:    lsl w11, w11, #16
 ; NOLSE-NEXT:    fmov w10, s3
 ; NOLSE-NEXT:    fmov s3, w11
 ; NOLSE-NEXT:    lsl w10, w10, #16
 ; NOLSE-NEXT:    fmaxnm s3, s3, s1
 ; NOLSE-NEXT:    fmov s2, w10
-; NOLSE-NEXT:    fmov w11, s3
 ; NOLSE-NEXT:    fmaxnm s2, s2, s0
+; NOLSE-NEXT:    fmov w11, s3
 ; NOLSE-NEXT:    ubfx w13, w11, #16, #1
 ; NOLSE-NEXT:    add w11, w11, w8
 ; NOLSE-NEXT:    fmov w10, s2
@@ -674,10 +696,11 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ;
 ; LSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fmov w10, s0
-; LSE-NEXT:    ldr s0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr s0, [x0]
 ; LSE-NEXT:    lsl w10, w10, #16
 ; LSE-NEXT:    fmov w9, s1
 ; LSE-NEXT:    fmov s2, w10
@@ -715,6 +738,7 @@ define <2 x bfloat> @test_atomicrmw_fmax_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w11, w9
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmax_v2bf16_seq_cst_align4:
@@ -837,6 +861,7 @@ define <2 x float> @test_atomicrmw_fmax_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl fmaxf
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
+; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
@@ -890,10 +915,10 @@ define <2 x double> @test_atomicrmw_fmax_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fmaxnm v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    mov x5, v2.d[1]
+; LSE-NEXT:    mov x6, x2
 ; LSE-NEXT:    fmov x4, d2
-; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index c2b6fa0dcb7a43..a3665c6e428608 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -20,6 +20,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align2:
@@ -39,6 +40,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align2:
@@ -83,6 +85,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -105,6 +108,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align4:
@@ -124,6 +128,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_f16_seq_cst_align4:
@@ -168,6 +173,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -178,6 +184,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -197,13 +204,15 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -224,6 +233,7 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align2:
@@ -263,6 +273,7 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 2
@@ -272,6 +283,7 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -291,13 +303,15 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -318,6 +332,7 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_bf16_seq_cst_align4:
@@ -357,6 +372,7 @@ define bfloat @test_atomicrmw_fmin_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, bfloat %value seq_cst, align 4
@@ -430,6 +446,7 @@ define float @test_atomicrmw_fmin_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fmin ptr %ptr, float %value seq_cst, align 4
@@ -514,6 +531,7 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
 define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_v2f16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fcvt s0, h0
 ; NOLSE-NEXT:    fcvt s1, h1
@@ -538,6 +556,7 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_v2f16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fcvt s2, h0
 ; LSE-NEXT:    ldr s0, [x0]
@@ -560,6 +579,7 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB6_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_v2f16_seq_cst_align4:
@@ -632,6 +652,7 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bfloat> %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NOLSE-NEXT:    mov h1, v0.h[1]
 ; NOLSE-NEXT:    fmov w10, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
@@ -644,15 +665,16 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; NOLSE-NEXT:    ldaxr w9, [x0]
 ; NOLSE-NEXT:    fmov s2, w9
-; NOLSE-NEXT:    lsl w11, w9, #16
 ; NOLSE-NEXT:    mov h3, v2.h[1]
+; NOLSE-NEXT:    fmov w11, s2
+; NOLSE-NEXT:    lsl w11, w11, #16
 ; NOLSE-NEXT:    fmov w10, s3
 ; NOLSE-NEXT:    fmov s3, w11
 ; NOLSE-NEXT:    lsl w10, w10, #16
 ; NOLSE-NEXT:    fminnm s3, s3, s1
 ; NOLSE-NEXT:    fmov s2, w10
-; NOLSE-NEXT:    fmov w11, s3
 ; NOLSE-NEXT:    fminnm s2, s2, s0
+; NOLSE-NEXT:    fmov w11, s3
 ; NOLSE-NEXT:    ubfx w13, w11, #16, #1
 ; NOLSE-NEXT:    add w11, w11, w8
 ; NOLSE-NEXT:    fmov w10, s2
@@ -674,10 +696,11 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ;
 ; LSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; LSE-NEXT:    mov h1, v0.h[1]
 ; LSE-NEXT:    fmov w10, s0
-; LSE-NEXT:    ldr s0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr s0, [x0]
 ; LSE-NEXT:    lsl w10, w10, #16
 ; LSE-NEXT:    fmov w9, s1
 ; LSE-NEXT:    fmov s2, w10
@@ -715,6 +738,7 @@ define <2 x bfloat> @test_atomicrmw_fmin_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w11, w9
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fmin_v2bf16_seq_cst_align4:
@@ -837,6 +861,7 @@ define <2 x float> @test_atomicrmw_fmin_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl fminf
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
+; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB8_3: // %cmpxchg.start
@@ -890,10 +915,10 @@ define <2 x double> @test_atomicrmw_fmin_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fminnm v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    mov x5, v2.d[1]
+; LSE-NEXT:    mov x6, x2
 ; LSE-NEXT:    fmov x4, d2
-; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index 3a55b54b16c8d9..7725ce0e731859 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -18,6 +18,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align2:
@@ -37,6 +38,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB0_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align2:
@@ -81,6 +83,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB0_2
 ; SOFTFP-NOLSE-NEXT:  .LBB0_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -103,6 +106,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; NOLSE-NEXT:    stlxrh w9, w8, [x0]
 ; NOLSE-NEXT:    cbnz w9, .LBB1_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align4:
@@ -122,6 +126,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; LSE-NEXT:    cmp w10, w8, uxth
 ; LSE-NEXT:    b.ne .LBB1_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_f16_seq_cst_align4:
@@ -166,6 +171,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB1_2
 ; SOFTFP-NOLSE-NEXT:  .LBB1_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x22, x21, [sp, #16] // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ldr x30, [sp], #48 // 8-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
@@ -176,6 +182,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
 define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align2:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -195,13 +202,15 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB2_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align2:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB2_1: // %atomicrmw.start
@@ -222,6 +231,7 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB2_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align2:
@@ -261,6 +271,7 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB2_2
 ; SOFTFP-NOLSE-NEXT:  .LBB2_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, bfloat %value seq_cst, align 2
@@ -270,6 +281,7 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
 define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value) #0 {
 ; NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align4:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w9, s0
 ; NOLSE-NEXT:    mov w8, #32767 // =0x7fff
 ; NOLSE-NEXT:    lsl w9, w9, #16
@@ -289,13 +301,15 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; NOLSE-NEXT:    stlxrh w10, w9, [x0]
 ; NOLSE-NEXT:    cbnz w10, .LBB3_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align4:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w9, s0
-; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    mov w8, #32767 // =0x7fff
+; LSE-NEXT:    ldr h0, [x0]
 ; LSE-NEXT:    lsl w9, w9, #16
 ; LSE-NEXT:    fmov s1, w9
 ; LSE-NEXT:  .LBB3_1: // %atomicrmw.start
@@ -316,6 +330,7 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; LSE-NEXT:    cmp w11, w10, uxth
 ; LSE-NEXT:    b.ne .LBB3_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_bf16_seq_cst_align4:
@@ -355,6 +370,7 @@ define bfloat @test_atomicrmw_fsub_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB3_2
 ; SOFTFP-NOLSE-NEXT:  .LBB3_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, bfloat %value seq_cst, align 4
@@ -428,6 +444,7 @@ define float @test_atomicrmw_fsub_f32_seq_cst_align4(ptr %ptr, float %value) #0
 ; SOFTFP-NOLSE-NEXT:    cbz w8, .LBB4_2
 ; SOFTFP-NOLSE-NEXT:  .LBB4_6: // %atomicrmw.end
 ; SOFTFP-NOLSE-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; SOFTFP-NOLSE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; SOFTFP-NOLSE-NEXT:    ldp x30, x21, [sp], #32 // 16-byte Folded Reload
 ; SOFTFP-NOLSE-NEXT:    ret
   %res = atomicrmw fsub ptr %ptr, float %value seq_cst, align 4
@@ -682,6 +699,7 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB7_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_v2f16_seq_cst_align4:
@@ -796,6 +814,7 @@ define <2 x bfloat> @test_atomicrmw_fsub_v2bf16_seq_cst_align4(ptr %ptr, <2 x bf
 ; LSE-NEXT:    cmp w10, w8
 ; LSE-NEXT:    b.ne .LBB8_1
 ; LSE-NEXT:  // %bb.2: // %atomicrmw.end
+; LSE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; LSE-NEXT:    ret
 ;
 ; SOFTFP-NOLSE-LABEL: test_atomicrmw_fsub_v2bf16_seq_cst_align4:
@@ -918,6 +937,7 @@ define <2 x float> @test_atomicrmw_fsub_v2f32_seq_cst_align8(ptr %ptr, <2 x floa
 ; SOFTFP-NOLSE-NEXT:    bl __subsf3
 ; SOFTFP-NOLSE-NEXT:    mov w8, w0
 ; SOFTFP-NOLSE-NEXT:    mov w9, w22
+; SOFTFP-NOLSE-NEXT:    // kill: def $w23 killed $w23 killed $x23 def $x23
 ; SOFTFP-NOLSE-NEXT:    orr x8, x8, x24, lsl #32
 ; SOFTFP-NOLSE-NEXT:    orr x9, x9, x23, lsl #32
 ; SOFTFP-NOLSE-NEXT:  .LBB9_3: // %cmpxchg.start
@@ -971,10 +991,10 @@ define <2 x double> @test_atomicrmw_fsub_v2f64_seq_cst_align8(ptr %ptr, <2 x dou
 ; LSE-NEXT:    fsub v2.2d, v1.2d, v0.2d
 ; LSE-NEXT:    mov x3, v1.d[1]
 ; LSE-NEXT:    fmov x2, d1
-; LSE-NEXT:    mov x6, x2
+; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    mov x5, v2.d[1]
+; LSE-NEXT:    mov x6, x2
 ; LSE-NEXT:    fmov x4, d2
-; LSE-NEXT:    mov x7, x3
 ; LSE-NEXT:    caspal x6, x7, x4, x5, [x0]
 ; LSE-NEXT:    fmov d1, x6
 ; LSE-NEXT:    cmp x7, x3

diff  --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
index 597b58b6979d74..98033a8e449ffb 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll
@@ -1,10 +1,11 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
 
 define half @test_rmw_xchg_f16(ptr %dst, half %new) {
 ; NOLSE-LABEL: test_rmw_xchg_f16:
 ; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; NOLSE-NEXT:    fmov w8, s0
 ; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
 ; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
@@ -13,13 +14,16 @@ define half @test_rmw_xchg_f16(ptr %dst, half %new) {
 ; NOLSE-NEXT:    cbnz w10, .LBB0_1
 ; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
 ; NOLSE-NEXT:    fmov s0, w9
+; NOLSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; NOLSE-NEXT:    ret
 ;
 ; LSE-LABEL: test_rmw_xchg_f16:
 ; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; LSE-NEXT:    fmov w8, s0
 ; LSE-NEXT:    swpalh w8, w8, [x0]
 ; LSE-NEXT:    fmov s0, w8
+; LSE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; LSE-NEXT:    ret
   %res = atomicrmw xchg ptr %dst, half %new seq_cst
   ret half %res
@@ -99,8 +103,8 @@ define fp128 @test_rmw_xchg_f128(ptr %dst, fp128 %new) {
 ; LSE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; LSE-NEXT:    mov x7, x5
 ; LSE-NEXT:    mov x6, x4
-; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    mov x5, x7
+; LSE-NEXT:    mov x4, x6
 ; LSE-NEXT:    caspal x4, x5, x2, x3, [x0]
 ; LSE-NEXT:    cmp x5, x7
 ; LSE-NEXT:    ccmp x4, x6, #0, eq

diff  --git a/llvm/test/CodeGen/AArch64/bf16-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
index 40d0d7cbc923bb..33997614598c3a 100644
--- a/llvm/test/CodeGen/AArch64/bf16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-instructions.ll
@@ -5,6 +5,8 @@
 define bfloat @test_fadd(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fadd:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -19,10 +21,13 @@ define bfloat @test_fadd(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fadd:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -39,6 +44,8 @@ define bfloat @test_fadd(bfloat %a, bfloat %b) #0 {
 define bfloat @test_fsub(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fsub:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -53,10 +60,13 @@ define bfloat @test_fsub(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fsub:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -73,6 +83,8 @@ define bfloat @test_fsub(bfloat %a, bfloat %b) #0 {
 define bfloat @test_fmul(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fmul:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -87,10 +99,13 @@ define bfloat @test_fmul(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fmul:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -107,9 +122,12 @@ define bfloat @test_fmul(bfloat %a, bfloat %b) #0 {
 define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-LABEL: test_fmadd:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w10, #32767 // =0x7fff
+; CHECK-CVT-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -132,12 +150,16 @@ define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-NEXT:    add w8, w9, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fmadd:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
+; CHECK-BF16-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    lsl w9, w9, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -161,6 +183,8 @@ define bfloat @test_fmadd(bfloat %a, bfloat %b, bfloat %c) #0 {
 define bfloat @test_fdiv(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_fdiv:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -175,10 +199,13 @@ define bfloat @test_fdiv(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fdiv:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -196,6 +223,8 @@ define bfloat @test_frem(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_frem:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
@@ -210,12 +239,15 @@ define bfloat @test_frem(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_frem:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -289,8 +321,11 @@ define bfloat @test_tailcall_flipped(bfloat %a, bfloat %b) #0 {
 define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) #0 {
 ; CHECK-LABEL: test_select:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    cmp w0, #0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = select i1 %c, bfloat %a, bfloat %b
   ret bfloat %r
@@ -299,14 +334,19 @@ define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) #0 {
 define bfloat @test_select_cc(bfloat %a, bfloat %b, bfloat %c, bfloat %d) #0 {
 ; CHECK-LABEL: test_select_cc:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-NEXT:    fmov w8, s3
 ; CHECK-NEXT:    fmov w9, s2
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    lsl w9, w9, #16
 ; CHECK-NEXT:    fmov s2, w8
 ; CHECK-NEXT:    fmov s3, w9
 ; CHECK-NEXT:    fcmp s3, s2
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %cc = fcmp une bfloat %c, %d
   %r = select i1 %cc, bfloat %a, bfloat %b
@@ -316,6 +356,8 @@ define bfloat @test_select_cc(bfloat %a, bfloat %b, bfloat %c, bfloat %d) #0 {
 define float @test_select_cc_f32_f16(float %a, float %b, bfloat %c, bfloat %d) #0 {
 ; CHECK-LABEL: test_select_cc_f32_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h3 killed $h3 def $s3
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-NEXT:    fmov w8, s3
 ; CHECK-NEXT:    fmov w9, s2
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -334,7 +376,10 @@ define bfloat @test_select_cc_f16_f32(bfloat %a, bfloat %b, float %c, float %d)
 ; CHECK-LABEL: test_select_cc_f16_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcmp s2, s3
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %cc = fcmp une float %c, %d
   %r = select i1 %cc, bfloat %a, bfloat %b
@@ -344,6 +389,8 @@ define bfloat @test_select_cc_f16_f32(bfloat %a, bfloat %b, float %c, float %d)
 define i1 @test_fcmp_une(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_une:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -360,6 +407,8 @@ define i1 @test_fcmp_une(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ueq(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ueq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -377,6 +426,8 @@ define i1 @test_fcmp_ueq(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ugt(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ugt:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -393,6 +444,8 @@ define i1 @test_fcmp_ugt(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_uge(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_uge:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -409,6 +462,8 @@ define i1 @test_fcmp_uge(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ult(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ult:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -425,6 +480,8 @@ define i1 @test_fcmp_ult(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ule(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ule:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -441,6 +498,8 @@ define i1 @test_fcmp_ule(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_uno(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_uno:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -457,6 +516,8 @@ define i1 @test_fcmp_uno(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_one(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_one:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -474,6 +535,8 @@ define i1 @test_fcmp_one(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_oeq(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_oeq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -490,6 +553,8 @@ define i1 @test_fcmp_oeq(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ogt(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ogt:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -506,6 +571,8 @@ define i1 @test_fcmp_ogt(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_oge(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_oge:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -522,6 +589,8 @@ define i1 @test_fcmp_oge(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_olt(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_olt:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -538,6 +607,8 @@ define i1 @test_fcmp_olt(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ole(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ole:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -554,6 +625,8 @@ define i1 @test_fcmp_ole(bfloat %a, bfloat %b) #0 {
 define i1 @test_fcmp_ord(bfloat %a, bfloat %b) #0 {
 ; CHECK-LABEL: test_fcmp_ord:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -570,6 +643,7 @@ define i1 @test_fcmp_ord(bfloat %a, bfloat %b) #0 {
 define void @test_fccmp(bfloat %in, ptr %out) {
 ; CHECK-LABEL: test_fccmp:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    movi v1.2s, #69, lsl #24
 ; CHECK-NEXT:    movi v3.2s, #72, lsl #24
@@ -593,6 +667,8 @@ define void @test_fccmp(bfloat %in, ptr %out) {
 define void @test_br_cc(bfloat %a, bfloat %b, ptr %p1, ptr %p2) #0 {
 ; CHECK-LABEL: test_br_cc:
 ; CHECK:       // %bb.0: // %common.ret
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
@@ -649,6 +725,7 @@ declare i1 @test_dummy(ptr %p1) #0
 define i32 @test_fptosi_i32(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptosi_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -661,6 +738,7 @@ define i32 @test_fptosi_i32(bfloat %a) #0 {
 define i64 @test_fptosi_i64(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptosi_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -673,6 +751,7 @@ define i64 @test_fptosi_i64(bfloat %a) #0 {
 define i32 @test_fptoui_i32(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptoui_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -685,6 +764,7 @@ define i32 @test_fptoui_i32(bfloat %a) #0 {
 define i64 @test_fptoui_i64(bfloat %a) #0 {
 ; CHECK-LABEL: test_fptoui_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -706,6 +786,7 @@ define bfloat @test_uitofp_i32(i32 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_uitofp_i32:
@@ -740,6 +821,7 @@ define bfloat @test_uitofp_i64(i64 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_uitofp_i64:
@@ -774,6 +856,7 @@ define bfloat @test_sitofp_i32(i32 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sitofp_i32:
@@ -812,6 +895,7 @@ define bfloat @test_sitofp_i64(i64 %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sitofp_i64:
@@ -843,6 +927,7 @@ define bfloat @test_uitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    ucvtf d1, w0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fcvtxn s1, d1
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    ubfx w10, w9, #16, #1
@@ -863,11 +948,13 @@ define bfloat @test_uitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_uitofp_i32_fadd:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    ucvtf d1, w0
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fcvtxn s1, d1
@@ -889,6 +976,7 @@ define bfloat @test_sitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    scvtf d1, w0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fcvtxn s1, d1
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    ubfx w10, w9, #16, #1
@@ -909,11 +997,13 @@ define bfloat @test_sitofp_i32_fadd(i32 %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sitofp_i32_fadd:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    scvtf d1, w0
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fcvtxn s1, d1
@@ -943,6 +1033,7 @@ define bfloat @test_fptrunc_float(float %a) #0 {
 ; CHECK-CVT-NEXT:    csel w8, w9, w8, vs
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fptrunc_float:
@@ -964,6 +1055,7 @@ define bfloat @test_fptrunc_double(double %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fptrunc_double:
@@ -978,6 +1070,7 @@ define bfloat @test_fptrunc_double(double %a) #0 {
 define float @test_fpext_float(bfloat %a) #0 {
 ; CHECK-LABEL: test_fpext_float:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -989,6 +1082,7 @@ define float @test_fpext_float(bfloat %a) #0 {
 define double @test_fpext_double(bfloat %a) #0 {
 ; CHECK-LABEL: test_fpext_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    lsl w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
@@ -1001,6 +1095,7 @@ define double @test_fpext_double(bfloat %a) #0 {
 define i16 @test_bitcast_bfloattoi16(bfloat %a) #0 {
 ; CHECK-LABEL: test_bitcast_bfloattoi16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %r = bitcast bfloat %a to i16
@@ -1011,6 +1106,7 @@ define bfloat @test_bitcast_i16tobfloat(i16 %a) #0 {
 ; CHECK-LABEL: test_bitcast_i16tobfloat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = bitcast i16 %a to bfloat
   ret bfloat %r
@@ -1052,6 +1148,7 @@ declare bfloat @llvm.fmuladd.f16(bfloat %a, bfloat %b, bfloat %c) #0
 define bfloat @test_sqrt(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_sqrt:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -1063,10 +1160,12 @@ define bfloat @test_sqrt(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sqrt:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1081,6 +1180,7 @@ define bfloat @test_powi(bfloat %a, i32 %b) #0 {
 ; CHECK-CVT-LABEL: test_powi:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1092,12 +1192,14 @@ define bfloat @test_powi(bfloat %a, i32 %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_powi:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1114,6 +1216,7 @@ define bfloat @test_sin(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_sin:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1125,12 +1228,14 @@ define bfloat @test_sin(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sin:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1146,6 +1251,7 @@ define bfloat @test_cos(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_cos:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1157,12 +1263,14 @@ define bfloat @test_cos(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_cos:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1178,6 +1286,7 @@ define bfloat @test_tan(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_tan:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1189,12 +1298,14 @@ define bfloat @test_tan(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_tan:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1210,6 +1321,7 @@ define bfloat @test_acos(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_acos:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1221,12 +1333,14 @@ define bfloat @test_acos(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_acos:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1242,6 +1356,7 @@ define bfloat @test_asin(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_asin:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1253,12 +1368,14 @@ define bfloat @test_asin(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_asin:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1274,6 +1391,7 @@ define bfloat @test_atan(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_atan:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1285,12 +1403,14 @@ define bfloat @test_atan(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_atan:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1306,6 +1426,8 @@ define bfloat @test_atan2(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_atan2:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
@@ -1320,12 +1442,15 @@ define bfloat @test_atan2(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_atan2:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1344,6 +1469,7 @@ define bfloat @test_cosh(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_cosh:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1355,12 +1481,14 @@ define bfloat @test_cosh(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_cosh:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1376,6 +1504,7 @@ define bfloat @test_sinh(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_sinh:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1387,12 +1516,14 @@ define bfloat @test_sinh(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_sinh:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1408,6 +1539,7 @@ define bfloat @test_tanh(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_tanh:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1419,12 +1551,14 @@ define bfloat @test_tanh(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_tanh:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1440,6 +1574,8 @@ define bfloat @test_pow(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_pow:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
@@ -1454,12 +1590,15 @@ define bfloat @test_pow(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_pow:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1478,6 +1617,7 @@ define bfloat @test_exp(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_exp:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1489,12 +1629,14 @@ define bfloat @test_exp(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_exp:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1510,6 +1652,7 @@ define bfloat @test_exp2(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_exp2:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1521,12 +1664,14 @@ define bfloat @test_exp2(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_exp2:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1542,6 +1687,7 @@ define bfloat @test_log(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_log:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1553,12 +1699,14 @@ define bfloat @test_log(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_log:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1574,6 +1722,7 @@ define bfloat @test_log10(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_log10:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1585,12 +1734,14 @@ define bfloat @test_log10(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_log10:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1606,6 +1757,7 @@ define bfloat @test_log2(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_log2:
 ; CHECK-CVT:       // %bb.0:
 ; CHECK-CVT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -1617,12 +1769,14 @@ define bfloat @test_log2(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_log2:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1637,6 +1791,9 @@ define bfloat @test_log2(bfloat %a) #0 {
 define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-LABEL: test_fma:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h2 killed $h2 def $s2
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s2
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
@@ -1654,10 +1811,14 @@ define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-NEXT:    add w8, w9, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fma:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h2 killed $h2 def $s2
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s2
 ; CHECK-BF16-NEXT:    fmov w9, s1
 ; CHECK-BF16-NEXT:    fmov w10, s0
@@ -1677,9 +1838,11 @@ define bfloat @test_fma(bfloat %a, bfloat %b, bfloat %c) #0 {
 define bfloat @test_fabs(bfloat %a) #0 {
 ; CHECK-LABEL: test_fabs:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w8, w8, #0x7fff
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = call bfloat @llvm.fabs.f16(bfloat %a)
   ret bfloat %r
@@ -1688,6 +1851,8 @@ define bfloat @test_fabs(bfloat %a) #0 {
 define bfloat @test_minnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_minnum:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -1702,10 +1867,13 @@ define bfloat @test_minnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_minnum:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1722,6 +1890,8 @@ define bfloat @test_minnum(bfloat %a, bfloat %b) #0 {
 define bfloat @test_maxnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_maxnum:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s1
 ; CHECK-CVT-NEXT:    fmov w10, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
@@ -1736,10 +1906,13 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_maxnum:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
@@ -1756,6 +1929,8 @@ define bfloat @test_maxnum(bfloat %a, bfloat %b) #0 {
 define bfloat @test_copysign(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1767,10 +1942,13 @@ define bfloat @test_copysign(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_copysign:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1788,20 +1966,25 @@ define bfloat @test_copysign(bfloat %a, bfloat %b) #0 {
 define bfloat @test_copysign_f32(bfloat %a, float %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign_f32:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
+; CHECK-CVT-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
 ; CHECK-CVT-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_copysign_f32:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
+; CHECK-BF16-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
 ; CHECK-BF16-NEXT:    bif v0.16b, v1.16b, v2.16b
@@ -1815,6 +1998,7 @@ define bfloat @test_copysign_f32(bfloat %a, float %b) #0 {
 define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign_f64:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    fcvt s1, d1
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1824,10 +2008,12 @@ define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_copysign_f64:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    fcvt s1, d1
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1846,6 +2032,8 @@ define bfloat @test_copysign_f64(bfloat %a, double %b) #0 {
 define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 ; CHECK-CVT-LABEL: test_copysign_extended:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1862,6 +2050,8 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 ;
 ; CHECK-BF16-LABEL: test_copysign_extended:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
 ; CHECK-BF16-NEXT:    mvni v2.4s, #128, lsl #24
@@ -1870,6 +2060,7 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 ; CHECK-BF16-NEXT:    fmov s0, w8
 ; CHECK-BF16-NEXT:    fmov s1, w9
 ; CHECK-BF16-NEXT:    bit v0.16b, v1.16b, v2.16b
+; CHECK-BF16-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %r = call bfloat @llvm.copysign.f16(bfloat %a, bfloat %b)
   %xr = fpext bfloat %r to float
@@ -1879,6 +2070,7 @@ define float @test_copysign_extended(bfloat %a, bfloat %b) #0 {
 define bfloat @test_floor(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_floor:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -1890,10 +2082,12 @@ define bfloat @test_floor(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_floor:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1907,6 +2101,7 @@ define bfloat @test_floor(bfloat %a) #0 {
 define bfloat @test_ceil(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_ceil:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -1918,10 +2113,12 @@ define bfloat @test_ceil(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_ceil:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1935,6 +2132,7 @@ define bfloat @test_ceil(bfloat %a) #0 {
 define bfloat @test_trunc(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_trunc:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -1946,10 +2144,12 @@ define bfloat @test_trunc(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_trunc:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1963,6 +2163,7 @@ define bfloat @test_trunc(bfloat %a) #0 {
 define bfloat @test_rint(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_rint:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -1974,10 +2175,12 @@ define bfloat @test_rint(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_rint:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -1991,6 +2194,7 @@ define bfloat @test_rint(bfloat %a) #0 {
 define bfloat @test_nearbyint(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_nearbyint:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2002,10 +2206,12 @@ define bfloat @test_nearbyint(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_nearbyint:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2019,6 +2225,7 @@ define bfloat @test_nearbyint(bfloat %a) #0 {
 define bfloat @test_round(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_round:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2030,10 +2237,12 @@ define bfloat @test_round(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_round:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2047,6 +2256,7 @@ define bfloat @test_round(bfloat %a) #0 {
 define bfloat @test_roundeven(bfloat %a) #0 {
 ; CHECK-CVT-LABEL: test_roundeven:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
@@ -2058,10 +2268,12 @@ define bfloat @test_roundeven(bfloat %a) #0 {
 ; CHECK-CVT-NEXT:    add w8, w10, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_roundeven:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s0
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8
@@ -2075,9 +2287,12 @@ define bfloat @test_roundeven(bfloat %a) #0 {
 define bfloat @test_fmuladd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-LABEL: test_fmuladd:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s1
 ; CHECK-CVT-NEXT:    fmov w9, s0
 ; CHECK-CVT-NEXT:    mov w10, #32767 // =0x7fff
+; CHECK-CVT-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-CVT-NEXT:    lsl w8, w8, #16
 ; CHECK-CVT-NEXT:    lsl w9, w9, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
@@ -2100,12 +2315,16 @@ define bfloat @test_fmuladd(bfloat %a, bfloat %b, bfloat %c) #0 {
 ; CHECK-CVT-NEXT:    add w8, w9, w8
 ; CHECK-CVT-NEXT:    lsr w8, w8, #16
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-BF16-LABEL: test_fmuladd:
 ; CHECK-BF16:       // %bb.0:
+; CHECK-BF16-NEXT:    // kill: def $h1 killed $h1 def $s1
+; CHECK-BF16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BF16-NEXT:    fmov w8, s1
 ; CHECK-BF16-NEXT:    fmov w9, s0
+; CHECK-BF16-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-BF16-NEXT:    lsl w8, w8, #16
 ; CHECK-BF16-NEXT:    lsl w9, w9, #16
 ; CHECK-BF16-NEXT:    fmov s0, w8

diff  --git a/llvm/test/CodeGen/AArch64/bf16-select.ll b/llvm/test/CodeGen/AArch64/bf16-select.ll
index 17bb8446545fdb..e3479f49e86b61 100644
--- a/llvm/test/CodeGen/AArch64/bf16-select.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-select.ll
@@ -7,8 +7,11 @@
 define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) {
 ; CHECK-BASE-LABEL: test_select:
 ; CHECK-BASE:       // %bb.0:
+; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BASE-NEXT:    cmp w0, #0
+; CHECK-BASE-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-BASE-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-BASE-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_select:
@@ -24,7 +27,10 @@ define bfloat @test_select_fcc(bfloat %a, bfloat %b, float %c, float %d) {
 ; CHECK-BASE-LABEL: test_select_fcc:
 ; CHECK-BASE:       // %bb.0:
 ; CHECK-BASE-NEXT:    fcmp s2, s3
+; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-BASE-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-BASE-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-BASE-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_select_fcc:
@@ -40,8 +46,11 @@ define bfloat @test_select_fcc(bfloat %a, bfloat %b, float %c, float %d) {
 define bfloat @test_select_icc(bfloat %a, bfloat %b, i32 %c, i32 %d) {
 ; CHECK-BASE-LABEL: test_select_icc:
 ; CHECK-BASE:       // %bb.0:
+; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-BASE-NEXT:    cmp w0, w1
+; CHECK-BASE-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-BASE-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-BASE-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-BASE-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_select_icc:

diff  --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
index 408cef22b91dd4..d59de3c56f4ee2 100644
--- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll
@@ -99,6 +99,7 @@ entry:
 define dso_local <4 x bfloat> @test_vmov_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vmov_n_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -113,6 +114,7 @@ entry:
 define dso_local <8 x bfloat> @test_vmovq_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vmovq_n_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -127,6 +129,7 @@ entry:
 define dso_local <4 x bfloat> @test_vdup_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vdup_n_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -141,6 +144,7 @@ entry:
 define dso_local <8 x bfloat> @test_vdupq_n_bf16(float %a.coerce) {
 ; CHECK-LABEL: test_vdupq_n_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -155,6 +159,7 @@ entry:
 define dso_local <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_vdup_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -165,6 +170,7 @@ entry:
 define dso_local <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_vdupq_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -196,6 +202,7 @@ define dso_local <4 x bfloat> @test_vext_aligned_bf16(<8 x bfloat> %a) {
 ; CHECK-LABEL: test_vext_aligned_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vext = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -206,6 +213,7 @@ define dso_local <4 x bfloat> @test_vext_unaligned_bf16(<8 x bfloat> %a) {
 ; CHECK-LABEL: test_vext_unaligned_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #6
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vext = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
@@ -216,11 +224,14 @@ define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) {
 ; CHECK-LABEL: shuffle3step0_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI16_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    mov v3.16b, v2.16b
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI16_1
-; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v4.16b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI16_1]
-; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-NEXT:    ret
 entry:
   %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
@@ -231,11 +242,14 @@ define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) {
 ; CHECK-LABEL: shuffle3step1_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI17_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    mov v3.16b, v2.16b
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI17_0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI17_1
-; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v4.16b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI17_1]
-; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-NEXT:    ret
 entry:
   %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
@@ -246,11 +260,14 @@ define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) {
 ; CHECK-LABEL: shuffle3step2_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI18_0
-; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    mov v3.16b, v2.16b
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI18_0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI18_1
-; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
+; CHECK-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v4.16b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI18_1]
-; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-NEXT:    ret
 entry:
   %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
@@ -327,6 +344,7 @@ entry:
 define <8 x bfloat> @test_shufflevector8xbfloat(<4 x bfloat> %a) {
 ; CHECK-LABEL: test_shufflevector8xbfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
index 762def573cd175..9b6e19eba3f4e6 100644
--- a/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-v4-instructions.ll
@@ -22,6 +22,7 @@ define <4 x bfloat> @add_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fadd v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -61,6 +62,7 @@ define <4 x bfloat> @sub_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fsub v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -89,6 +91,7 @@ define <4 x bfloat> @mul_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fmul v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -117,6 +120,7 @@ define <4 x bfloat> @div_h(<4 x bfloat> %a, <4 x bfloat> %b) {
 ; CHECK-BF16-NEXT:    shll v0.4s, v0.4h, #16
 ; CHECK-BF16-NEXT:    fdiv v0.4s, v0.4s, v1.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
 entry:
 
@@ -164,6 +168,7 @@ define <4 x bfloat> @s_to_h(<4 x float> %a) {
 ; CHECK-BF16-LABEL: s_to_h:
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = fptrunc <4 x float> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -191,6 +196,7 @@ define <4 x bfloat> @d_to_h(<4 x double> %a) {
 ; CHECK-BF16-NEXT:    fcvtxn v0.2s, v0.2d
 ; CHECK-BF16-NEXT:    fcvtxn2 v0.4s, v1.2d
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = fptrunc <4 x double> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -256,6 +262,7 @@ define <4 x bfloat> @sitofp_i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -279,6 +286,7 @@ define <4 x bfloat> @sitofp_i16(<4 x i16> %a) #0 {
 ; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i16> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -301,6 +309,7 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i32> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -333,6 +342,7 @@ define <4 x bfloat> @sitofp_i64(<4 x i64> %a) #0 {
 ; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i64> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -358,6 +368,7 @@ define <4 x bfloat> @uitofp_i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -382,6 +393,7 @@ define <4 x bfloat> @uitofp_i16(<4 x i16> %a) #0 {
 ; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i16> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -404,6 +416,7 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
 ; CHECK-BF16:       // %bb.0:
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i32> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -436,6 +449,7 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
 ; CHECK-BF16-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-BF16-NEXT:    fcvtn2 v0.4s, v1.2d
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i64> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -444,6 +458,7 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
 define void @test_insert_at_zero(bfloat %a, ptr %b) #0 {
 ; CHECK-LABEL: test_insert_at_zero:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
   %1 = insertelement <4 x bfloat> undef, bfloat %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
index 8eb8423bf75171..c03e2e5321321a 100644
--- a/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll
@@ -336,6 +336,7 @@ define <4 x bfloat> @sitofp_v4i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = sitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -563,6 +564,7 @@ define <4 x bfloat> @uitofp_v4i8(<4 x i8> %a) #0 {
 ; CHECK-BF16-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-BF16-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-BF16-NEXT:    bfcvtn v0.4h, v0.4s
+; CHECK-BF16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BF16-NEXT:    ret
   %1 = uitofp <4 x i8> %a to <4 x bfloat>
   ret <4 x bfloat> %1
@@ -775,6 +777,7 @@ define <8 x bfloat> @uitofp_i64(<8 x i64> %a) #0 {
 define void @test_insert_at_zero(bfloat %a, ptr %b) #0 {
 ; CHECK-LABEL: test_insert_at_zero:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %1 = insertelement <8 x bfloat> undef, bfloat %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
index b0e84fb6742c9c..222d7435ff7420 100644
--- a/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll
@@ -16,6 +16,7 @@ entry:
 define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind {
 ; CHECK-LABEL: test_vdup_n_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -28,6 +29,7 @@ entry:
 define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind {
 ; CHECK-LABEL: test_vdupq_n_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -40,6 +42,7 @@ entry:
 define <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vdup_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -51,6 +54,7 @@ entry:
 define <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vdupq_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -84,6 +88,8 @@ entry:
 define <8 x bfloat> @test_vcombine_bf16(<4 x bfloat> %low, <4 x bfloat> %high) nounwind {
 ; CHECK-LABEL: test_vcombine_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -96,6 +102,7 @@ define <4 x bfloat> @test_vget_high_bf16(<8 x bfloat> %a) nounwind {
 ; CHECK-LABEL: test_vget_high_bf16:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -106,6 +113,7 @@ entry:
 define <4 x bfloat> @test_vget_low_bf16(<8 x bfloat> %a) nounwind {
 ; CHECK-LABEL: test_vget_low_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -116,6 +124,7 @@ entry:
 define bfloat @test_vget_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vget_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -138,6 +147,8 @@ entry:
 define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vset_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    mov v1.h[1], v0.h[0]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ret
@@ -150,6 +161,7 @@ entry:
 define <8 x bfloat> @test_vsetq_lane_bf16(bfloat %a, <8 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vsetq_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    mov v1.h[7], v0.h[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
@@ -162,6 +174,7 @@ entry:
 define bfloat @test_vduph_lane_bf16(<4 x bfloat> %v) nounwind {
 ; CHECK-LABEL: test_vduph_lane_bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -184,7 +197,10 @@ entry:
 define <4 x bfloat> @test_vcopy_lane_bf16_v1(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_lane_bf16_v1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[3]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
@@ -195,7 +211,10 @@ entry:
 define <4 x bfloat> @test_vcopy_lane_bf16_v2(<4 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_lane_bf16_v2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
@@ -206,6 +225,7 @@ entry:
 define <8 x bfloat> @test_vcopyq_lane_bf16_v1(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopyq_lane_bf16_v1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[0], v1.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -218,6 +238,7 @@ entry:
 define <8 x bfloat> @test_vcopyq_lane_bf16_v2(<8 x bfloat> %a, <4 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopyq_lane_bf16_v2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[6], v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -230,7 +251,9 @@ entry:
 define <4 x bfloat> @test_vcopy_laneq_bf16_v1(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_laneq_bf16_v1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[7]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vgetq_lane = extractelement <8 x bfloat> %b, i32 7
@@ -242,7 +265,9 @@ entry:
 define <4 x bfloat> @test_vcopy_laneq_bf16_v2(<4 x bfloat> %a, <8 x bfloat> %b) nounwind {
 ; CHECK-LABEL: test_vcopy_laneq_bf16_v2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[4]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vgetq_lane = extractelement <8 x bfloat> %b, i32 4

diff  --git a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
index 9dd505359fcd9c..864ddc2967c185 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
@@ -7,6 +7,7 @@ define <2 x i16> @bitcast_v2i16_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: bitcast_v2i16_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %y = bitcast <2 x half> %x to <2 x i16>
   ret <2 x i16> %y

diff  --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index 3f5edaefdd1fe8..39f2572d9fd354 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -89,6 +89,7 @@ define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add i32 %a, %b
   %d = bitcast i32 %c to <4 x i8>
@@ -126,6 +127,7 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
 ; CHECK-SD-NEXT:    add w8, w0, w1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: bitcast_i32_v2i16:
@@ -136,6 +138,7 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add i32 %a, %b
   %d = bitcast i32 %c to <2 x i16>
@@ -404,6 +407,7 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-SD-NEXT:    strh w8, [sp, #14]
 ; CHECK-SD-NEXT:    ldr s0, [sp, #12]
 ; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #16
 ; CHECK-SD-NEXT:    ret
 ;
@@ -420,6 +424,7 @@ define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.h[3], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add <2 x i16> %a, %b
   %d = bitcast <2 x i16> %c to <4 x i8>
@@ -438,6 +443,7 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x8]
 ; CHECK-SD-NEXT:    orr x8, x8, #0x2
 ; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #16
 ; CHECK-SD-NEXT:    ret
 ;
@@ -450,6 +456,7 @@ define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = add <4 x i8> %a, %b
   %d = bitcast <4 x i8> %c to <2 x i16>
@@ -499,12 +506,12 @@ define <4 x i64> @bitcast_v8i32_v4i64(<8 x i32> %a, <8 x i32> %b){
 ;
 ; CHECK-GI-LABEL: bitcast_v8i32_v4i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    add v0.4s, v0.4s, v2.4s
-; CHECK-GI-NEXT:    add v1.4s, v1.4s, v3.4s
-; CHECK-GI-NEXT:    mov x8, v0.d[1]
-; CHECK-GI-NEXT:    mov x9, v1.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
-; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT:    add v2.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    add v3.4s, v1.4s, v3.4s
+; CHECK-GI-NEXT:    mov x8, v2.d[1]
+; CHECK-GI-NEXT:    mov x9, v3.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v2.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    ret
@@ -560,12 +567,12 @@ define <4 x i64> @bitcast_v16i16_v4i64(<16 x i16> %a, <16 x i16> %b){
 ;
 ; CHECK-GI-LABEL: bitcast_v16i16_v4i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    add v0.8h, v0.8h, v2.8h
-; CHECK-GI-NEXT:    add v1.8h, v1.8h, v3.8h
-; CHECK-GI-NEXT:    mov x8, v0.d[1]
-; CHECK-GI-NEXT:    mov x9, v1.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
-; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
+; CHECK-GI-NEXT:    add v2.8h, v0.8h, v2.8h
+; CHECK-GI-NEXT:    add v3.8h, v1.8h, v3.8h
+; CHECK-GI-NEXT:    mov x8, v2.d[1]
+; CHECK-GI-NEXT:    mov x9, v3.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v2.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    ret
@@ -602,18 +609,18 @@ define <8 x i64> @bitcast_v16i32_v8i64(<16 x i32> %a, <16 x i32> %b){
 ;
 ; CHECK-GI-LABEL: bitcast_v16i32_v8i64:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    add v0.4s, v0.4s, v4.4s
-; CHECK-GI-NEXT:    add v1.4s, v1.4s, v5.4s
-; CHECK-GI-NEXT:    add v2.4s, v2.4s, v6.4s
-; CHECK-GI-NEXT:    add v3.4s, v3.4s, v7.4s
-; CHECK-GI-NEXT:    mov x8, v0.d[1]
-; CHECK-GI-NEXT:    mov x9, v1.d[1]
-; CHECK-GI-NEXT:    mov x10, v2.d[1]
-; CHECK-GI-NEXT:    mov x11, v3.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
-; CHECK-GI-NEXT:    mov v1.d[0], v1.d[0]
-; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
-; CHECK-GI-NEXT:    mov v3.d[0], v3.d[0]
+; CHECK-GI-NEXT:    add v4.4s, v0.4s, v4.4s
+; CHECK-GI-NEXT:    add v5.4s, v1.4s, v5.4s
+; CHECK-GI-NEXT:    add v6.4s, v2.4s, v6.4s
+; CHECK-GI-NEXT:    add v7.4s, v3.4s, v7.4s
+; CHECK-GI-NEXT:    mov x8, v4.d[1]
+; CHECK-GI-NEXT:    mov x9, v5.d[1]
+; CHECK-GI-NEXT:    mov x10, v6.d[1]
+; CHECK-GI-NEXT:    mov x11, v7.d[1]
+; CHECK-GI-NEXT:    mov v0.d[0], v4.d[0]
+; CHECK-GI-NEXT:    mov v1.d[0], v5.d[0]
+; CHECK-GI-NEXT:    mov v2.d[0], v6.d[0]
+; CHECK-GI-NEXT:    mov v3.d[0], v7.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    mov v2.d[1], x10

diff  --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index 7adfafc28aa9cc..eefb862c5313cf 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -546,6 +546,7 @@ define i32 @test9(i64 %b, i32 %e) {
 ; CHECK-NEXT:    lsr x0, x0, #12
 ; CHECK-NEXT:    lsr w8, w1, #23
 ; CHECK-NEXT:    bfi w0, w8, #23, #9
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %c = lshr i64 %b, 12
   %d = trunc i64 %c to i32

diff  --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index 08bbad6ab69b5a..e86f55d63f754b 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -25,6 +25,7 @@ declare i16 @llvm.bswap.i16(i16)
 define i64 @bswap_i16_to_i64_anyext(i16 %a) {
 ; CHECK-SD-LABEL: bswap_i16_to_i64_anyext:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    rev16 x8, x0
 ; CHECK-SD-NEXT:    lsl x0, x8, #48
 ; CHECK-SD-NEXT:    ret
@@ -178,6 +179,7 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %a){
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    rev16 v0.8b, v0.8b
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
@@ -208,6 +210,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    rev w8, w8
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %res = call <1 x i32> @llvm.bswap.v1i32(<1 x i32> %a)

diff  --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll
index 0b71ff2ecfcdf3..ac37fbc349d7d7 100644
--- a/llvm/test/CodeGen/AArch64/build-one-lane.ll
+++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll
@@ -9,6 +9,7 @@ define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.b[7], w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 7
   ret <8 x i8> %v
@@ -29,6 +30,7 @@ define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.h[3], w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 3
   ret <4 x i16> %v
@@ -49,6 +51,7 @@ define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    mov v0.s[1], w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x i32> <i32 0, i32 undef>, i32 %s, i32 1
   ret <2 x i32> %v
@@ -78,7 +81,9 @@ define <2 x float> @v2f32z(float %t, float %s) nounwind {
 ; CHECK-LABEL: v2f32z:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d0, #0000000000000000
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x float> <float 0.0, float undef>, float %s, i32 1
   ret <2 x float> %v
@@ -88,6 +93,7 @@ define <4 x float> @v4f32z(float %t, float %s) nounwind {
 ; CHECK-LABEL: v4f32z:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    ret
   %v = insertelement <4 x float> <float 0.0, float 0.0, float 0.0, float undef>, float %s, i32 3
@@ -98,6 +104,7 @@ define <2 x double> @v2f64z(double %t, double %s) nounwind {
 ; CHECK-LABEL: v2f64z:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x double> <double 0.0, double undef>, double %s, i32 1
@@ -112,6 +119,7 @@ define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v0.b[7], w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 7
   ret <8 x i8> %v
@@ -132,6 +140,7 @@ define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v0.h[3], w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 3
   ret <4 x i16> %v
@@ -152,6 +161,7 @@ define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v0.2d, #0xffffffffffffffff
 ; CHECK-NEXT:    mov v0.s[1], w1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %v = insertelement <2 x i32> <i32 -1, i32 undef>, i32 %s, i32 1
   ret <2 x i32> %v
@@ -267,6 +277,7 @@ define void @v2f32st(ptr %p, float %s) nounwind {
 ; CHECK-LABEL: v2f32st:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.2s, #64, lsl #24
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-NEXT:    str d1, [x0]
 ; CHECK-NEXT:    ret
@@ -279,6 +290,7 @@ define void @v4f32st(ptr %p, float %s) nounwind {
 ; CHECK-LABEL: v4f32st:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi v1.4s, #192, lsl #24
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -291,6 +303,7 @@ define void @v2f64st(ptr %p, double %s) nounwind {
 ; CHECK-LABEL: v2f64st:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov v1.2d, #2.00000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    str q1, [x0]
 ; CHECK-NEXT:    ret
@@ -307,6 +320,7 @@ define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1f
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    mov w10, #30 // =0x1e

diff  --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
index 6bfb9adc8a60d1..36b1b2cdcb4320 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll
@@ -631,6 +631,7 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) {
 define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
 ; CHECK-LABEL: larger_bv_than_source:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.h[2]
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
index 7f793b7ca550a6..dbbfbea9176f6e 100644
--- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
+++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll
@@ -101,6 +101,7 @@ define <8 x i8> @test6(ptr nocapture noundef readonly %a, ptr nocapture noundef
 ; CHECK-NEXT:    ld1r { v1.8b }, [x1]
 ; CHECK-NEXT:    ld1r { v0.8b }, [x0]
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -119,6 +120,7 @@ define <8 x i8> @test7(ptr nocapture noundef readonly %a, ptr nocapture noundef
 ; CHECK-NEXT:    ld1r { v1.8b }, [x0]
 ; CHECK-NEXT:    ld1r { v0.8b }, [x1]
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1
@@ -191,6 +193,7 @@ define <8 x i8> @test11(ptr nocapture noundef readonly %a, ptr nocapture noundef
 ; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i8, ptr %a, align 1

diff  --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
index 27666f06e81426..8fbed8bfdb3fd8 100644
--- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
+++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll
@@ -143,6 +143,7 @@ B:
 define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: f_i32_sign_extend_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    csel x8, x1, x2, ge
@@ -166,6 +167,7 @@ B:
 define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: g_i32_sign_extend_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x8, w0
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    csel x8, x1, x2, lt

diff  --git a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
index e87d43161a895e..1cc194e77b94b1 100644
--- a/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-to-cmn.ll
@@ -31,6 +31,7 @@ entry:
 define i1 @test_EQ_IlsEbT(i64 %a, i16 %b) {
 ; CHECK-LABEL: test_EQ_IlsEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -44,6 +45,7 @@ entry:
 define i1 @test_EQ_IlcEbT(i64 %a, i8 %b) {
 ; CHECK-LABEL: test_EQ_IlcEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -108,6 +110,7 @@ entry:
 define i1 @test_EQ_IslEbT(i16 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IslEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, sxth
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -164,6 +167,7 @@ entry:
 define i1 @test_EQ_IclEbT(i8 %a, i64 %b) {
 ; CHECK-LABEL: test_EQ_IclEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, uxtb
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -245,6 +249,7 @@ entry:
 define i1 @test_NE_IlsEbT(i64 %a, i16 %b) {
 ; CHECK-LABEL: test_NE_IlsEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -258,6 +263,7 @@ entry:
 define i1 @test_NE_IlcEbT(i64 %a, i8 %b) {
 ; CHECK-LABEL: test_NE_IlcEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    cmn x0, w1, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -322,6 +328,7 @@ entry:
 define i1 @test_NE_IslEbT(i16 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IslEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, sxth
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -378,6 +385,7 @@ entry:
 define i1 @test_NE_IclEbT(i8 %a, i64 %b) {
 ; CHECK-LABEL: test_NE_IclEbT:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmn x1, w0, uxtb
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index b7817ebe59b9b5..186d191444feb6 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -53,6 +53,7 @@ define i1 @test_return_bool(ptr %value, i8 %oldValue, i8 %newValue) {
 ; CHECK-LABEL: test_return_bool:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    and w8, w1, #0xff
+; CHECK-NEXT:    ; kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:  LBB1_1: ; %cmpxchg.start
 ; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldaxrb w9, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/combine-andintoload.ll b/llvm/test/CodeGen/AArch64/combine-andintoload.ll
index 1d016a406d6b4f..f0b8fef848998f 100644
--- a/llvm/test/CodeGen/AArch64/combine-andintoload.ll
+++ b/llvm/test/CodeGen/AArch64/combine-andintoload.ll
@@ -254,12 +254,14 @@ define i64 @load8_and16_sext(ptr %p, i8 %y) {
 ; CHECK-LABEL: load8_and16_sext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldrb w8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    and x0, x1, x8
 ; CHECK-NEXT:    ret
 ;
 ; CHECKBE-LABEL: load8_and16_sext:
 ; CHECKBE:       // %bb.0:
 ; CHECKBE-NEXT:    ldrb w8, [x0]
+; CHECKBE-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECKBE-NEXT:    and x0, x1, x8
 ; CHECKBE-NEXT:    ret
   %x = load i8, ptr %p, align 4

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
index b62e2f1fa945f2..a5c64c0982d0f8 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll
@@ -14,15 +14,29 @@ target triple = "aarch64"
 
 ; Expected to not transform
 define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
-; CHECK-LABEL: complex_add_v2f16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov h2, v0.h[1]
-; CHECK-NEXT:    mov h3, v1.h[1]
-; CHECK-NEXT:    fsub h1, h1, h2
-; CHECK-NEXT:    fadd h0, h3, h0
-; CHECK-NEXT:    mov v1.h[1], v0.h[0]
-; CHECK-NEXT:    fmov d0, d1
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: complex_add_v2f16:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    mov h2, v0.h[1]
+; CHECK-SD-NEXT:    mov h3, v1.h[1]
+; CHECK-SD-NEXT:    fsub h1, h1, h2
+; CHECK-SD-NEXT:    fadd h0, h3, h0
+; CHECK-SD-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-SD-NEXT:    fmov d0, d1
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: complex_add_v2f16:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NEXT:    fsub h1, h1, h2
+; CHECK-GI-NEXT:    fadd h0, h3, h0
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    ret
 entry:
   %a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
   %a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
index 8f84314f07918b..afcdb76067f433 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll
@@ -7,12 +7,15 @@ target triple = "aarch64"
 define <2 x half> @complex_mul_v2f16(<2 x half> %a, <2 x half> %b) {
 ; CHECK-LABEL: complex_mul_v2f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h2, v0.h[1]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul h3, h0, v1.h[1]
 ; CHECK-NEXT:    fmul h2, h2, v1.h[1]
 ; CHECK-NEXT:    fmla h3, h1, v0.h[1]
 ; CHECK-NEXT:    fnmsub h0, h1, h0, h2
 ; CHECK-NEXT:    mov v0.h[1], v3.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %a.real   = shufflevector <2 x half> %a, <2 x half> poison, <1 x i32> <i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
index eae724870fb9dd..039025dafa0d6e 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll
@@ -111,8 +111,8 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> %
 ; CHECK-NEXT:    fmul v17.2s, v6.2s, v5.2s
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    fmul v5.2s, v4.2s, v5.2s
-; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #0
 ; CHECK-NEXT:    fmla v17.2s, v1.2s, v4.2s
+; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #0
 ; CHECK-NEXT:    str d1, [x0]
 ; CHECK-NEXT:    fneg v16.2s, v5.2s
 ; CHECK-NEXT:    fcmla v0.4s, v2.4s, v3.4s, #90
@@ -162,19 +162,19 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p
 ; CHECK-NEXT:    ld2 { v0.2s, v1.2s }, [x0]
 ; CHECK-NEXT:    ld2 { v2.2s, v3.2s }, [x1]
 ; CHECK-NEXT:    fmul v4.2s, v3.2s, v1.2s
-; CHECK-NEXT:    fmul v1.2s, v2.2s, v1.2s
+; CHECK-NEXT:    fmul v6.2s, v2.2s, v1.2s
 ; CHECK-NEXT:    fneg v4.2s, v4.2s
-; CHECK-NEXT:    fmla v1.2s, v0.2s, v3.2s
+; CHECK-NEXT:    fmla v6.2s, v0.2s, v3.2s
 ; CHECK-NEXT:    fmla v4.2s, v0.2s, v2.2s
 ; CHECK-NEXT:    str d4, [x4]
 ; CHECK-NEXT:    ldr q5, [x2]
-; CHECK-NEXT:    ext v2.16b, v5.16b, v5.16b, #8
-; CHECK-NEXT:    zip1 v0.2s, v5.2s, v2.2s
-; CHECK-NEXT:    zip2 v2.2s, v5.2s, v2.2s
-; CHECK-NEXT:    fmul v3.2s, v0.2s, v1.2s
-; CHECK-NEXT:    fmul v1.2s, v2.2s, v1.2s
-; CHECK-NEXT:    fmla v3.2s, v4.2s, v2.2s
-; CHECK-NEXT:    fneg v2.2s, v1.2s
+; CHECK-NEXT:    ext v7.16b, v5.16b, v5.16b, #8
+; CHECK-NEXT:    zip1 v0.2s, v5.2s, v7.2s
+; CHECK-NEXT:    zip2 v1.2s, v5.2s, v7.2s
+; CHECK-NEXT:    fmul v3.2s, v0.2s, v6.2s
+; CHECK-NEXT:    fmul v6.2s, v1.2s, v6.2s
+; CHECK-NEXT:    fmla v3.2s, v4.2s, v1.2s
+; CHECK-NEXT:    fneg v2.2s, v6.2s
 ; CHECK-NEXT:    fmla v2.2s, v4.2s, v0.2s
 ; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    st2 { v2.2s, v3.2s }, [x5]
@@ -241,20 +241,20 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, <
 ; CHECK-NEXT:    zip1 v3.2s, v3.2s, v17.2s
 ; CHECK-NEXT:    fmul v18.2s, v6.2s, v7.2s
 ; CHECK-NEXT:    fmul v5.2s, v19.2s, v16.2s
-; CHECK-NEXT:    fmul v7.2s, v0.2s, v7.2s
 ; CHECK-NEXT:    fmul v16.2s, v2.2s, v16.2s
+; CHECK-NEXT:    fmul v7.2s, v0.2s, v7.2s
 ; CHECK-NEXT:    fneg v4.2s, v18.2s
 ; CHECK-NEXT:    fmla v5.2s, v3.2s, v2.2s
-; CHECK-NEXT:    fmla v7.2s, v1.2s, v6.2s
 ; CHECK-NEXT:    fneg v2.2s, v16.2s
+; CHECK-NEXT:    fmla v7.2s, v1.2s, v6.2s
 ; CHECK-NEXT:    fmla v4.2s, v1.2s, v0.2s
-; CHECK-NEXT:    fmul v0.2s, v7.2s, v5.2s
 ; CHECK-NEXT:    fmla v2.2s, v3.2s, v19.2s
+; CHECK-NEXT:    fmul v0.2s, v7.2s, v5.2s
 ; CHECK-NEXT:    fmul v17.2s, v4.2s, v5.2s
 ; CHECK-NEXT:    str d4, [x0]
+; CHECK-NEXT:    fmla v17.2s, v2.2s, v7.2s
 ; CHECK-NEXT:    fneg v16.2s, v0.2s
 ; CHECK-NEXT:    zip1 v0.4s, v2.4s, v5.4s
-; CHECK-NEXT:    fmla v17.2s, v2.2s, v7.2s
 ; CHECK-NEXT:    fmla v16.2s, v2.2s, v4.2s
 ; CHECK-NEXT:    st2 { v16.2s, v17.2s }, [x1]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
index 5e0602789cb72b..dcc11609ca2316 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll
@@ -49,6 +49,8 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)
@@ -147,6 +149,8 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -249,6 +253,8 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %active.lane.mask.entry = tail call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 100)

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
index 2a51ded6a96761..8e26ef6b87ecca 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll
@@ -42,6 +42,8 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -129,6 +131,8 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uzp2 z1.d, z1.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z2.d
 ; CHECK-NEXT:    faddv d1, p0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -221,6 +225,8 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) {
 ; CHECK-NEXT:    fadd z2.d, z2.d, z0.d
 ; CHECK-NEXT:    faddv d0, p0, z1.d
 ; CHECK-NEXT:    faddv d1, p0, z2.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.vscale.i64()
@@ -326,12 +332,14 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia
 ; CHECK-NEXT:    add z2.d, z5.d, z2.d
 ; CHECK-NEXT:    b.ne .LBB3_1
 ; CHECK-NEXT:  // %bb.2: // %middle.block
+; CHECK-NEXT:    uaddv d2, p0, z2.d
 ; CHECK-NEXT:    uzp2 z3.d, z1.d, z0.d
 ; CHECK-NEXT:    uzp1 z1.d, z1.d, z0.d
-; CHECK-NEXT:    uaddv d2, p0, z2.d
 ; CHECK-NEXT:    faddv d0, p0, z3.d
-; CHECK-NEXT:    faddv d1, p0, z1.d
 ; CHECK-NEXT:    fmov x8, d2
+; CHECK-NEXT:    faddv d1, p0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z1
 ; CHECK-NEXT:    str w8, [x4]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
index 0277a98cba7b9e..b4425c0c01e177 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll
@@ -57,6 +57,8 @@ define <vscale x 4 x double> @complex_mul_non_const(<vscale x 4 x double> %a, <v
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov z6.d, #0 // =0x0
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d5 killed $d5 def $z5
+; CHECK-NEXT:    // kill: def $d4 killed $d4 def $z4
 ; CHECK-NEXT:    mov z5.d, d5
 ; CHECK-NEXT:    mov z4.d, d4
 ; CHECK-NEXT:    mov z24.d, z6.d

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
index 83178a11499625..ad9240b0922bda 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll
@@ -54,6 +54,8 @@ define <4 x double> @complex_mul_non_const(<4 x double> %a, <4 x double> %b, [2
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v7.2d, #0000000000000000
 ; CHECK-NEXT:    movi v6.2d, #0000000000000000
+; CHECK-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-NEXT:    mov v4.d[1], v5.d[0]
 ; CHECK-NEXT:    movi v5.2d, #0000000000000000
 ; CHECK-NEXT:    fcmla v7.2d, v1.2d, v3.2d, #0

diff  --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
index 5c4782ec986c2c..7686740aec3026 100644
--- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
+++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll
@@ -201,76 +201,84 @@ entry:
 define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c) {
 ; CHECK-LABEL: abp90c12:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov v1.s[1], v3.s[0]
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-NEXT:    ldr s17, [sp, #40]
-; CHECK-NEXT:    ldr s3, [sp, #32]
 ; CHECK-NEXT:    add x10, sp, #56
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    add x9, sp, #48
+; CHECK-NEXT:    mov v1.s[1], v3.s[0]
+; CHECK-NEXT:    ldr s3, [sp, #32]
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
 ; CHECK-NEXT:    ld1 { v17.s }[1], [x10]
-; CHECK-NEXT:    ld1 { v3.s }[1], [x9]
-; CHECK-NEXT:    add x9, sp, #72
+; CHECK-NEXT:    // kill: def $s5 killed $s5 def $q5
 ; CHECK-NEXT:    ldr s16, [sp, #8]
+; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-NEXT:    add x10, sp, #24
+; CHECK-NEXT:    ld1 { v3.s }[1], [x9]
+; CHECK-NEXT:    add x9, sp, #72
+; CHECK-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-NEXT:    ldr s2, [sp]
-; CHECK-NEXT:    mov v1.s[2], v5.s[0]
-; CHECK-NEXT:    ldr s5, [sp, #96]
+; CHECK-NEXT:    ld1 { v16.s }[1], [x10]
+; CHECK-NEXT:    add x10, sp, #112
 ; CHECK-NEXT:    ldr s20, [sp, #136]
+; CHECK-NEXT:    mov v1.s[2], v5.s[0]
 ; CHECK-NEXT:    ld1 { v17.s }[2], [x9]
 ; CHECK-NEXT:    add x9, sp, #64
-; CHECK-NEXT:    ld1 { v16.s }[1], [x10]
+; CHECK-NEXT:    ldr s5, [sp, #96]
 ; CHECK-NEXT:    ld1 { v3.s }[2], [x9]
-; CHECK-NEXT:    add x10, sp, #112
 ; CHECK-NEXT:    mov v0.s[2], v4.s[0]
 ; CHECK-NEXT:    add x9, sp, #88
 ; CHECK-NEXT:    ldr s4, [sp, #104]
+; CHECK-NEXT:    ldr s19, [sp, #192]
 ; CHECK-NEXT:    ld1 { v5.s }[1], [x10]
-; CHECK-NEXT:    mov v1.s[3], v7.s[0]
 ; CHECK-NEXT:    add x10, sp, #80
 ; CHECK-NEXT:    ld1 { v17.s }[3], [x9]
+; CHECK-NEXT:    mov v1.s[3], v7.s[0]
 ; CHECK-NEXT:    add x9, sp, #120
 ; CHECK-NEXT:    ld1 { v3.s }[3], [x10]
-; CHECK-NEXT:    ldr s7, [sp, #128]
 ; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
+; CHECK-NEXT:    ldr s7, [sp, #128]
 ; CHECK-NEXT:    add x10, sp, #144
 ; CHECK-NEXT:    mov v0.s[3], v6.s[0]
-; CHECK-NEXT:    ld1 { v7.s }[1], [x10]
 ; CHECK-NEXT:    add x9, sp, #16
-; CHECK-NEXT:    ldr s19, [sp, #192]
-; CHECK-NEXT:    fmul v6.4s, v17.4s, v1.4s
-; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
+; CHECK-NEXT:    ld1 { v7.s }[1], [x10]
 ; CHECK-NEXT:    ld1 { v2.s }[1], [x9]
+; CHECK-NEXT:    add x9, sp, #160
+; CHECK-NEXT:    fmul v6.4s, v17.4s, v1.4s
 ; CHECK-NEXT:    fmul v18.4s, v4.4s, v16.4s
 ; CHECK-NEXT:    fmul v16.4s, v5.4s, v16.4s
-; CHECK-NEXT:    add x9, sp, #160
+; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
+; CHECK-NEXT:    add x10, sp, #208
 ; CHECK-NEXT:    ld1 { v7.s }[2], [x9]
 ; CHECK-NEXT:    add x9, sp, #152
-; CHECK-NEXT:    add x10, sp, #208
+; CHECK-NEXT:    ld1 { v19.s }[1], [x10]
 ; CHECK-NEXT:    ld1 { v20.s }[1], [x9]
 ; CHECK-NEXT:    add x9, sp, #176
-; CHECK-NEXT:    ld1 { v19.s }[1], [x10]
-; CHECK-NEXT:    fneg v6.4s, v6.4s
-; CHECK-NEXT:    fmla v1.4s, v0.4s, v17.4s
 ; CHECK-NEXT:    add x10, sp, #184
+; CHECK-NEXT:    fneg v6.4s, v6.4s
 ; CHECK-NEXT:    fneg v18.4s, v18.4s
 ; CHECK-NEXT:    fmla v16.4s, v2.4s, v4.4s
+; CHECK-NEXT:    fmla v1.4s, v0.4s, v17.4s
 ; CHECK-NEXT:    ld1 { v7.s }[3], [x9]
 ; CHECK-NEXT:    add x9, sp, #168
-; CHECK-NEXT:    ldr s4, [sp, #200]
 ; CHECK-NEXT:    ld1 { v20.s }[2], [x9]
+; CHECK-NEXT:    ldr s4, [sp, #200]
 ; CHECK-NEXT:    add x9, sp, #216
 ; CHECK-NEXT:    fmla v6.4s, v0.4s, v3.4s
-; CHECK-NEXT:    fsub v0.4s, v7.4s, v1.4s
-; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
 ; CHECK-NEXT:    fmla v18.4s, v2.4s, v5.4s
+; CHECK-NEXT:    ld1 { v4.s }[1], [x9]
+; CHECK-NEXT:    fsub v0.4s, v7.4s, v1.4s
 ; CHECK-NEXT:    fsub v1.4s, v19.4s, v16.4s
 ; CHECK-NEXT:    ld1 { v20.s }[3], [x10]
 ; CHECK-NEXT:    fadd v2.4s, v4.4s, v18.4s
 ; CHECK-NEXT:    fadd v3.4s, v20.4s, v6.4s
 ; CHECK-NEXT:    ext v4.16b, v0.16b, v1.16b, #12
 ; CHECK-NEXT:    ext v5.16b, v3.16b, v2.16b, #12
-; CHECK-NEXT:    ext v4.16b, v0.16b, v4.16b, #12
 ; CHECK-NEXT:    trn2 v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    ext v4.16b, v0.16b, v4.16b, #12
 ; CHECK-NEXT:    ext v5.16b, v3.16b, v5.16b, #8
 ; CHECK-NEXT:    rev64 v4.4s, v4.4s
 ; CHECK-NEXT:    trn2 v2.4s, v4.4s, v5.4s

diff  --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 8570b4d46d9733..0daa6e7f16202a 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -10,12 +10,15 @@ define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
 ;
 ; CHECK-GI-LABEL: concat1:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w9, v1.s[1]
 ; CHECK-GI-NEXT:    mov v0.h[1], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
 ; CHECK-GI-NEXT:    mov v0.h[3], w9
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x i8> %v4i8
@@ -35,6 +38,7 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v8i8 = shufflevector <4 x i8> %A, <4 x i8> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    ret <8 x i8> %v8i8
@@ -43,6 +47,8 @@ define <8 x i8> @concat2(<4 x i8> %A, <4 x i8> %B) {
 define <16 x i8> @concat3(<8 x i8> %A, <8 x i8> %B) {
 ; CHECK-LABEL: concat3:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v16i8 = shufflevector <8 x i8> %A, <8 x i8> %B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -63,6 +69,7 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v4i16 = shufflevector <2 x i16> %A, <2 x i16> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x i16> %v4i16
@@ -71,6 +78,8 @@ define <4 x i16> @concat4(<2 x i16> %A, <2 x i16> %B) {
 define <8 x i16> @concat5(<4 x i16> %A, <4 x i16> %B) {
 ; CHECK-LABEL: concat5:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v8i16 = shufflevector <4 x i16> %A, <4 x i16> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -92,6 +101,8 @@ define <16 x i16> @concat6(ptr %A, ptr %B) {
 define <4 x i32> @concat7(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: concat7:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v4i32 = shufflevector <2 x i32> %A, <2 x i32> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -122,6 +133,7 @@ define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    fmov w8, s1
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
    %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x half> %v4half
@@ -130,6 +142,8 @@ define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
 define <8 x half> @concat10(<4 x half> %A, <4 x half> %B) {
 ; CHECK-LABEL: concat10:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
    %v8half= shufflevector <4 x half> %A, <4 x half> %B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -154,9 +168,9 @@ define <8 x i16> @concat_v8s16_v2s16(ptr %ptr) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0]
 ; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    mov v0.h[1], w9
-; CHECK-GI-NEXT:    mov v0.s[0], v0.s[0]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v1.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-GI-NEXT:    ret
     %a = load <2 x i16>, ptr %ptr
     %b = shufflevector <2 x i16> %a, <2 x i16> %a, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -195,6 +209,10 @@ define <16 x i8> @concat_v16s8_v4s8_load(ptr %ptrA, ptr %ptrB, ptr %ptrC, ptr %p
 define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <4 x i8> %D) {
 ; CHECK-SD-LABEL: concat_v16s8_v4s8_reg:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
@@ -224,8 +242,12 @@ define <16 x i8> @concat_v16s8_v4s8_reg(<4 x i8> %A, <4 x i8> %B, <4 x i8> %C, <
 define <8 x i16> @concat_v8s16_v2s16_reg(<2 x i16> %A, <2 x i16> %B, <2 x i16> %C, <2 x i16> %D) {
 ; CHECK-SD-LABEL: concat_v8s16_v2s16_reg:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    adrp x8, .LCPI15_0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
@@ -260,9 +282,10 @@ define <4 x i16> @concat_undef_first_use_first(ptr %p1, ptr %p2) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0]
 ; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    mov v0.h[1], w9
-; CHECK-GI-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v1.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %l1 = load <2 x i16>, ptr %p1
   %l2 = load <2 x i16>, ptr %p2
@@ -282,9 +305,10 @@ define <4 x i16> @concat_undef_first_use_second(ptr %p1, ptr %p2) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ldrh w8, [x0]
 ; CHECK-GI-NEXT:    ldrh w9, [x0, #2]
-; CHECK-GI-NEXT:    fmov s0, w8
-; CHECK-GI-NEXT:    mov v0.h[1], w9
-; CHECK-GI-NEXT:    mov v0.s[1], v0.s[0]
+; CHECK-GI-NEXT:    fmov s1, w8
+; CHECK-GI-NEXT:    mov v1.h[1], w9
+; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %l1 = load <2 x i16>, ptr %p1
   %l2 = load <2 x i16>, ptr %p2

diff  --git a/llvm/test/CodeGen/AArch64/concatbinop.ll b/llvm/test/CodeGen/AArch64/concatbinop.ll
index ae0ded9db894aa..828182d18b38ce 100644
--- a/llvm/test/CodeGen/AArch64/concatbinop.ll
+++ b/llvm/test/CodeGen/AArch64/concatbinop.ll
@@ -5,6 +5,10 @@
 define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_add:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
@@ -50,6 +54,10 @@ define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_sub:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
@@ -63,6 +71,10 @@ define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
@@ -76,6 +88,10 @@ define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: concat_xor:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
@@ -89,6 +105,10 @@ define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_fadd:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
@@ -102,6 +122,10 @@ define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
 define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_fmul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    fmul v0.8h, v0.8h, v1.8h
@@ -115,6 +139,10 @@ define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x
 define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
 ; CHECK-LABEL: concat_min:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-NEXT:    fminnm v0.8h, v0.8h, v1.8h

diff  --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
index 85abc8c8566b2e..40684b0f3a256b 100644
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@@ -76,6 +76,7 @@ entry:
 define bfloat @t7(bfloat %x)  {
 ; CHECK-LABEL: t7:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -89,6 +90,7 @@ define bfloat @t7(bfloat %x)  {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptosi bfloat %x to i32
@@ -99,6 +101,7 @@ entry:
 define bfloat @t8(bfloat %x)  {
 ; CHECK-LABEL: t8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -112,6 +115,7 @@ define bfloat @t8(bfloat %x)  {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui bfloat %x to i32
@@ -194,6 +198,7 @@ entry:
 define bfloat @t7_strict(bfloat %x) #0 {
 ; CHECK-LABEL: t7_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -207,6 +212,7 @@ define bfloat @t7_strict(bfloat %x) #0 {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
@@ -217,6 +223,7 @@ entry:
 define bfloat @t8_strict(bfloat %x) #0 {
 ; CHECK-LABEL: t8_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -230,6 +237,7 @@ define bfloat @t8_strict(bfloat %x) #0 {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0

diff  --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
index c32113f7090812..11f19b8c56965b 100644
--- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
+++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll
@@ -6,11 +6,12 @@
 define void @signbits_vXi1(<4 x i16> %a1) {
 ; CHECK-LABEL: signbits_vXi1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    mov w1, wzr
-; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    mov w2, wzr
+; CHECK-NEXT:    dup v0.4h, v0.h[0]
+; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    add v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    cmle v0.4h, v0.4h, #0
 ; CHECK-NEXT:    umov w0, v0.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll
index 75009e41a511d6..a2ebdd28b16b8f 100644
--- a/llvm/test/CodeGen/AArch64/dup.ll
+++ b/llvm/test/CodeGen/AArch64/dup.ll
@@ -38,6 +38,7 @@ entry:
 define <2 x i8> @duplane0_v2i8(<2 x i8> %b) {
 ; CHECK-LABEL: duplane0_v2i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -132,6 +133,7 @@ define <4 x i8> @dup_v4i8(i8 %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    dup v0.8b, w0
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %b = insertelement <4 x i8> poison, i8 %a, i64 0
@@ -142,6 +144,7 @@ entry:
 define <4 x i8> @duplane0_v4i8(<4 x i8> %b) {
 ; CHECK-SD-LABEL: duplane0_v4i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -150,6 +153,7 @@ define <4 x i8> @duplane0_v4i8(<4 x i8> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <4 x i8> %b, <4 x i8> poison, <4 x i32> zeroinitializer
@@ -167,6 +171,7 @@ define <4 x i8> @loaddup_v4i8(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.8b }, [x0]
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load i8, ptr %p
@@ -189,6 +194,7 @@ entry:
 define <8 x i8> @duplane0_v8i8(<8 x i8> %b) {
 ; CHECK-LABEL: duplane0_v8i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -293,6 +299,7 @@ define <2 x i16> @dup_v2i16(i16 %a) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    dup v0.4h, w0
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %b = insertelement <2 x i16> poison, i16 %a, i64 0
@@ -303,6 +310,7 @@ entry:
 define <2 x i16> @duplane0_v2i16(<2 x i16> %b) {
 ; CHECK-SD-LABEL: duplane0_v2i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
@@ -311,6 +319,7 @@ define <2 x i16> @duplane0_v2i16(<2 x i16> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> zeroinitializer
@@ -328,6 +337,7 @@ define <2 x i16> @loaddup_v2i16(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.4h }, [x0]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load i16, ptr %p
@@ -350,6 +360,7 @@ entry:
 define <3 x i16> @duplane0_v3i16(<3 x i16> %b) {
 ; CHECK-LABEL: duplane0_v3i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -383,6 +394,7 @@ entry:
 define <4 x i16> @duplane0_v4i16(<4 x i16> %b) {
 ; CHECK-LABEL: duplane0_v4i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -491,6 +503,7 @@ entry:
 define <2 x i32> @duplane0_v2i32(<2 x i32> %b) {
 ; CHECK-LABEL: duplane0_v2i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -681,9 +694,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
 ; CHECK-GI-LABEL: duplane0_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
@@ -702,7 +718,9 @@ define <3 x i64> @loaddup_v3i64(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
 ; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load i64, ptr %p
@@ -885,6 +903,7 @@ entry:
 define <2 x half> @dup_v2half(half %a) {
 ; CHECK-LABEL: dup_v2half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -896,6 +915,7 @@ entry:
 define <2 x half> @duplane0_v2half(<2 x half> %b) {
 ; CHECK-LABEL: duplane0_v2half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -918,6 +938,7 @@ entry:
 define <3 x half> @dup_v3half(half %a) {
 ; CHECK-LABEL: dup_v3half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -929,6 +950,7 @@ entry:
 define <3 x half> @duplane0_v3half(<3 x half> %b) {
 ; CHECK-LABEL: duplane0_v3half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -951,6 +973,7 @@ entry:
 define <4 x half> @dup_v4half(half %a) {
 ; CHECK-LABEL: dup_v4half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -962,6 +985,7 @@ entry:
 define <4 x half> @duplane0_v4half(<4 x half> %b) {
 ; CHECK-LABEL: duplane0_v4half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -984,6 +1008,7 @@ entry:
 define <8 x half> @dup_v8half(half %a) {
 ; CHECK-LABEL: dup_v8half:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1017,12 +1042,14 @@ entry:
 define <16 x half> @dup_v16half(half %a) {
 ; CHECK-SD-LABEL: dup_v16half:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v16half:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
 ; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -1066,6 +1093,7 @@ entry:
 define <2 x bfloat> @dup_v2bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v2bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1077,6 +1105,7 @@ entry:
 define <2 x bfloat> @duplane0_v2bfloat(<2 x bfloat> %b) {
 ; CHECK-LABEL: duplane0_v2bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1099,6 +1128,7 @@ entry:
 define <3 x bfloat> @dup_v3bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v3bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1110,6 +1140,7 @@ entry:
 define <3 x bfloat> @duplane0_v3bfloat(<3 x bfloat> %b) {
 ; CHECK-LABEL: duplane0_v3bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1132,6 +1163,7 @@ entry:
 define <4 x bfloat> @dup_v4bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v4bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1143,6 +1175,7 @@ entry:
 define <4 x bfloat> @duplane0_v4bfloat(<4 x bfloat> %b) {
 ; CHECK-LABEL: duplane0_v4bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1165,6 +1198,7 @@ entry:
 define <8 x bfloat> @dup_v8bfloat(bfloat %a) {
 ; CHECK-LABEL: dup_v8bfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1198,12 +1232,14 @@ entry:
 define <16 x bfloat> @dup_v16bfloat(bfloat %a) {
 ; CHECK-SD-LABEL: dup_v16bfloat:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v16bfloat:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    dup v2.8h, v0.h[0]
 ; CHECK-GI-NEXT:    dup v1.8h, v0.h[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -1247,6 +1283,7 @@ entry:
 define <2 x float> @dup_v2float(float %a) {
 ; CHECK-LABEL: dup_v2float:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1258,6 +1295,7 @@ entry:
 define <2 x float> @duplane0_v2float(<2 x float> %b) {
 ; CHECK-LABEL: duplane0_v2float:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1280,6 +1318,7 @@ entry:
 define <3 x float> @dup_v3float(float %a) {
 ; CHECK-LABEL: dup_v3float:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1313,6 +1352,7 @@ entry:
 define <4 x float> @dup_v4float(float %a) {
 ; CHECK-LABEL: dup_v4float:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1346,12 +1386,14 @@ entry:
 define <8 x float> @dup_v8float(float %a) {
 ; CHECK-SD-LABEL: dup_v8float:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v8float:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    dup v2.4s, v0.s[0]
 ; CHECK-GI-NEXT:    dup v1.4s, v0.s[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -1395,6 +1437,7 @@ entry:
 define <2 x double> @dup_v2double(double %a) {
 ; CHECK-LABEL: dup_v2double:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1434,8 +1477,10 @@ define <3 x double> @dup_v3double(double %a) {
 ;
 ; CHECK-GI-LABEL: dup_v3double:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    dup v3.2d, v0.d[0]
 ; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v3.d[1]
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    ret
@@ -1455,9 +1500,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
 ; CHECK-GI-LABEL: duplane0_v3double:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov d2, d0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    dup v0.2d, v2.d[0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
@@ -1476,7 +1524,9 @@ define <3 x double> @loaddup_v3double(ptr %p) {
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ld1r { v0.2d }, [x0]
 ; CHECK-GI-NEXT:    ld1r { v2.2d }, [x0]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %a = load double, ptr %p
@@ -1488,12 +1538,14 @@ entry:
 define <4 x double> @dup_v4double(double %a) {
 ; CHECK-SD-LABEL: dup_v4double:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-SD-NEXT:    mov v1.16b, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: dup_v4double:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    dup v2.2d, v0.d[0]
 ; CHECK-GI-NEXT:    dup v1.2d, v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b

diff  --git a/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll b/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
index 6aa375dc5e4d40..420cf93a023d70 100644
--- a/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
+++ b/llvm/test/CodeGen/AArch64/duplane-index-patfrags.ll
@@ -100,6 +100,7 @@ define <4 x i64> @sel.v4i32.umull(<4 x i32> %a, <4 x i32> %b, <2 x i32> %c) {
 define <4 x i32> @sel.v4i32.sqdmull(<8 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: sel.v4i32.sqdmull:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    sqdmull v2.4s, v0.4h, v1.h[0]
 ; CHECK-NEXT:    sqdmlal2 v2.4s, v0.8h, v1.h[0]
 ; CHECK-NEXT:    mov v0.16b, v2.16b

diff  --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index 0765377417d5bf..177f2cafcf833a 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -9,6 +9,7 @@
 define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -19,6 +20,7 @@ define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -29,6 +31,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-SD-LABEL: i8_off8:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i8_off8:
@@ -44,6 +47,7 @@ define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off15:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
@@ -54,11 +58,13 @@ define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-SD-LABEL: i8_off22:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v1.16b, v1.16b, #6
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i8_off22:
 ; CHECK-GISEL:       // %bb.0: // %entry
 ; CHECK-GISEL-NEXT:    ext v0.16b, v1.16b, v0.16b, #6
+; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
@@ -69,6 +75,7 @@ entry:
 define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -79,6 +86,7 @@ define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -89,6 +97,7 @@ define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off7:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
@@ -99,6 +108,7 @@ define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off8:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
@@ -109,6 +119,7 @@ entry:
 define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 0, i32 1>
@@ -119,6 +130,7 @@ define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
@@ -129,6 +141,7 @@ define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off3:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 3, i32 4>
@@ -139,6 +152,7 @@ define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off4:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 4, i32 5>
@@ -149,6 +163,7 @@ entry:
 define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 0>
@@ -159,6 +174,7 @@ define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-SD-LABEL: i64_off1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i64_off1:
@@ -174,6 +190,7 @@ define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 2>
@@ -184,6 +201,7 @@ entry:
 define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -194,6 +212,7 @@ define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
@@ -204,6 +223,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
 ; CHECK-SD-LABEL: i8_zero_off8:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i8_zero_off8:
@@ -220,6 +240,7 @@ define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
@@ -236,6 +257,7 @@ define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
 ; CHECK-GISEL:       // %bb.0: // %entry
 ; CHECK-GISEL-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-GISEL-NEXT:    ext v0.16b, v0.16b, v0.16b, #6
+; CHECK-GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GISEL-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
@@ -246,6 +268,7 @@ entry:
 define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -256,6 +279,7 @@ define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -267,6 +291,7 @@ define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
@@ -287,6 +312,7 @@ entry:
 define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
@@ -297,6 +323,7 @@ define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
@@ -308,6 +335,7 @@ define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 3, i32 4>
@@ -328,6 +356,7 @@ entry:
 define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 0>
@@ -338,6 +367,7 @@ define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) {
 ; CHECK-SD-LABEL: i64_zero_off1:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GISEL-LABEL: i64_zero_off1:

diff  --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll
index 8f3a42cfd1e12d..72f4d58a425e78 100644
--- a/llvm/test/CodeGen/AArch64/extbinopload.ll
+++ b/llvm/test/CodeGen/AArch64/extbinopload.ll
@@ -6,6 +6,7 @@ define <4 x i16> @normal_load_v4i8(ptr %p) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp s0, s1, [x0]
 ; CHECK-NEXT:    uaddl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l1 = load <4 x i8>, ptr %p
   %q = getelementptr i8, ptr %p, i32 4
@@ -38,6 +39,7 @@ define <4 x i16> @load_v4i8(ptr %p) {
 ; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-NEXT:    shl v0.4h, v0.4h, #3
 ; CHECK-NEXT:    uaddw v0.8h, v0.8h, v1.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l1 = load <4 x i8>, ptr %p
   %q = getelementptr i8, ptr %p, i32 4

diff  --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll
index 3f18e756b5c785..b87157a183835d 100644
--- a/llvm/test/CodeGen/AArch64/extract-bits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-bits.ll
@@ -160,6 +160,8 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; CHECK-LABEL: bextr64_a1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl x8, x8, x2
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -197,6 +199,8 @@ define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
 ; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x9, x9, x2
 ; CHECK-NEXT:    lsr x8, x8, x1
 ; CHECK-NEXT:    sub x9, x9, #1
@@ -394,6 +398,8 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %
 ; CHECK-LABEL: bextr64_b1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl x8, x8, x2
 ; CHECK-NEXT:    bic x0, x9, x8
@@ -429,6 +435,8 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
 ; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x9, x9, x2
 ; CHECK-NEXT:    lsr x8, x8, x1
 ; CHECK-NEXT:    bic x0, x8, x9
@@ -465,6 +473,7 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl x8, x8, x2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -483,6 +492,7 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -502,6 +512,7 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_32_b2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #-1 // =0xffffffff
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsr x9, x0, x1
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    bic w0, w9, w8
@@ -541,6 +552,7 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #32 // =0x20
 ; CHECK-NEXT:    mov w9, #-1 // =0xffffffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w10, w0, w1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr w8, w9, w8
@@ -580,6 +592,7 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    mov w10, #-1 // =0xffffffff
 ; CHECK-NEXT:    sub w9, w9, w2
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    lsr w9, w10, w9
 ; CHECK-NEXT:    and w0, w9, w8
@@ -633,6 +646,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #64 // =0x40
 ; CHECK-NEXT:    mov x9, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x10, x0, x1
 ; CHECK-NEXT:    sub w8, w8, w2
 ; CHECK-NEXT:    lsr x8, x9, x8
@@ -672,6 +686,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-NEXT:    mov w9, #64 // =0x40
 ; CHECK-NEXT:    mov x10, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    sub w9, w9, w2
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x8, x8, x1
 ; CHECK-NEXT:    lsr x9, x10, x9
 ; CHECK-NEXT:    and x0, x9, x8
@@ -782,6 +797,7 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind {
 define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr32_d1_indexzext:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w0, w1
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    sub w9, w9, w2
@@ -818,6 +834,7 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-LABEL: bextr32_d3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr w8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov w9, #32 // =0x20
 ; CHECK-NEXT:    sub w9, w9, w2
 ; CHECK-NEXT:    lsr w8, w8, w1
@@ -854,6 +871,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind {
 define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind {
 ; CHECK-LABEL: bextr64_d1_indexzext:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr x8, x0, x1
 ; CHECK-NEXT:    mov w9, #64 // =0x40
 ; CHECK-NEXT:    sub w9, w9, w2
@@ -890,6 +908,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n
 ; CHECK-LABEL: bextr64_d3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr x8, [x0]
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov w9, #64 // =0x40
 ; CHECK-NEXT:    sub w9, w9, w2
 ; CHECK-NEXT:    lsr x8, x8, x1
@@ -916,6 +935,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind
 ; CHECK-NEXT:    neg x9, x2
 ; CHECK-NEXT:    lsl x8, x8, x9
 ; CHECK-NEXT:    lsr x0, x8, x9
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %shifted = lshr i64 %val, %numskipbits
   %numhighbits = sub i64 64, %numlowbits

diff  --git a/llvm/test/CodeGen/AArch64/extract-insert.ll b/llvm/test/CodeGen/AArch64/extract-insert.ll
index 994d6ee1ca6038..8c133d76ce3173 100644
--- a/llvm/test/CodeGen/AArch64/extract-insert.ll
+++ b/llvm/test/CodeGen/AArch64/extract-insert.ll
@@ -6,10 +6,12 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i32_le:
 ; BE:       // %bb.0:
 ; BE-NEXT:    lsr x0, x0, #32
+; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i32_le:
 ; LE:       // %bb.0:
+; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <4 x i32>
@@ -20,11 +22,13 @@ define i32 @trunc_i64_to_i32_le(i64 %x) {
 define i32 @trunc_i64_to_i32_be(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i32_be:
 ; BE:       // %bb.0:
+; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i32_be:
 ; LE:       // %bb.0:
 ; LE-NEXT:    lsr x0, x0, #32
+; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <4 x i32>
@@ -36,10 +40,12 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i16_le:
 ; BE:       // %bb.0:
 ; BE-NEXT:    lsr x0, x0, #48
+; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i16_le:
 ; LE:       // %bb.0:
+; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <8 x i16>
@@ -50,11 +56,13 @@ define i16 @trunc_i64_to_i16_le(i64 %x) {
 define i16 @trunc_i64_to_i16_be(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i16_be:
 ; BE:       // %bb.0:
+; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i16_be:
 ; LE:       // %bb.0:
 ; LE-NEXT:    lsr x0, x0, #48
+; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <2 x i64> undef, i64 %x, i32 0
   %bc = bitcast <2 x i64> %ins to <8 x i16>
@@ -97,11 +105,13 @@ define i8 @trunc_i32_to_i8_be(i32 %x) {
 define i8 @trunc_i64_to_i8_be(i64 %x) {
 ; BE-LABEL: trunc_i64_to_i8_be:
 ; BE:       // %bb.0:
+; BE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; BE-NEXT:    ret
 ;
 ; LE-LABEL: trunc_i64_to_i8_be:
 ; LE:       // %bb.0:
 ; LE-NEXT:    lsr x0, x0, #56
+; LE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; LE-NEXT:    ret
   %ins = insertelement <3 x i64> undef, i64 %x, i32 0
   %bc = bitcast <3 x i64> %ins to <24 x i8>

diff  --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
index c1fcc994d29b58..4b8f3e86b5fefe 100644
--- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll
+++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll
@@ -130,6 +130,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    and x0, x8, x0
@@ -161,6 +162,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_a3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    sub x8, x8, #1
@@ -282,6 +284,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b1_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x0, x8
 ; CHECK-NEXT:    ret
@@ -311,6 +314,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind {
 ; CHECK-LABEL: bzhi64_b3_load_indexzext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    ldr x9, [x0]
 ; CHECK-NEXT:    lsl x8, x8, x1
 ; CHECK-NEXT:    bic x0, x9, x8

diff  --git a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
index 82979810a350bf..ecb76d9320a288 100644
--- a/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
+++ b/llvm/test/CodeGen/AArch64/extract-sext-zext.ll
@@ -48,11 +48,13 @@ define i32 @extract_v4i32(<4 x i32> %x, i32 %y) {
 define i32 @extract_v2i32(<2 x i32> %x, i32 %y) {
 ; CHECK-ISEL-LABEL: extract_v2i32:
 ; CHECK-ISEL:       // %bb.0:
+; CHECK-ISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-ISEL-NEXT:    mov w0, v0.s[1]
 ; CHECK-ISEL-NEXT:    ret
 ;
 ; CHECK-GLOBAL-LABEL: extract_v2i32:
 ; CHECK-GLOBAL:       // %bb.0:
+; CHECK-GLOBAL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GLOBAL-NEXT:    mov s0, v0.s[1]
 ; CHECK-GLOBAL-NEXT:    fmov w0, s0
 ; CHECK-GLOBAL-NEXT:    ret
@@ -72,6 +74,7 @@ define i16 @extract_v8i16(<8 x i16> %x, i32 %y) {
 define i16 @extract_v4i16(<4 x i16> %x, i32 %y) {
 ; CHECK-LABEL: extract_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[1]
 ; CHECK-NEXT:    ret
   %ext = extractelement <4 x i16> %x, i32 1
@@ -90,6 +93,7 @@ define i8 @extract_v16i8(<16 x i8> %x, i32 %y) {
 define i8 @extract_v8i8(<8 x i8> %x, i32 %y) {
 ; CHECK-LABEL: extract_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[1]
 ; CHECK-NEXT:    ret
   %ext = extractelement <8 x i8> %x, i32 1
@@ -100,6 +104,7 @@ define i8 @extract_v8i8(<8 x i8> %x, i32 %y) {
 define i64 @sv2i32i64(<2 x i32> %x) {
 ; CHECK-LABEL: sv2i32i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.s[1]
 ; CHECK-NEXT:    ret
   %e = extractelement <2 x i32> %x, i64 1
@@ -120,6 +125,7 @@ define i64 @sv4i32i64(<4 x i32> %x) {
 define i64 @sv4i16i64(<4 x i16> %x) {
 ; CHECK-LABEL: sv4i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -140,6 +146,7 @@ define i64 @sv8i16i64(<8 x i16> %x) {
 define i64 @sv8i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: sv8i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -170,6 +177,7 @@ define i32 @sv8i16i32(<8 x i16> %x) {
 define i32 @sv4i16i32(<4 x i16> %x) {
 ; CHECK-LABEL: sv4i16i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -190,6 +198,7 @@ define i32 @sv16i8i32(<16 x i8> %x) {
 define i32 @sv8i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: sv8i8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -210,6 +219,7 @@ define i16 @sv16i8i16(<16 x i8> %x) {
 define i16 @sv8i8i16(<8 x i8> %x) {
 ; CHECK-LABEL: sv8i8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -222,6 +232,7 @@ define i16 @sv8i8i16(<8 x i8> %x) {
 define i64 @zv2i32i64(<2 x i32> %x) {
 ; CHECK-LABEL: zv2i32i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w0, v0.s[1]
 ; CHECK-NEXT:    ret
   %e = extractelement <2 x i32> %x, i64 1
@@ -242,6 +253,7 @@ define i64 @zv4i32i64(<4 x i32> %x) {
 define i64 @zv4i16i64(<4 x i16> %x) {
 ; CHECK-LABEL: zv4i16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -262,6 +274,7 @@ define i64 @zv8i16i64(<8 x i16> %x) {
 define i64 @zv8i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: zv8i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -292,6 +305,7 @@ define i32 @zv8i16i32(<8 x i16> %x) {
 define i32 @zv4i16i32(<4 x i16> %x) {
 ; CHECK-LABEL: zv4i16i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <4 x i16> %x, i64 2
@@ -312,6 +326,7 @@ define i32 @zv16i8i32(<16 x i8> %x) {
 define i32 @zv8i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: zv8i8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -332,6 +347,7 @@ define i16 @zv16i8i16(<16 x i8> %x) {
 define i16 @zv8i8i16(<8 x i8> %x) {
 ; CHECK-LABEL: zv8i8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
   %e = extractelement <8 x i8> %x, i64 2
@@ -370,6 +386,7 @@ define i32 @redundant_i16i32(<8 x i16> %x) {
 define i32 @both_i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: both_i8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[2]
 ; CHECK-NEXT:    smov w9, v0.b[2]
 ; CHECK-NEXT:    eor w0, w8, w9
@@ -384,6 +401,7 @@ define i32 @both_i8i32(<8 x i8> %x) {
 define i32 @redundant_i8i32(<8 x i8> %x) {
 ; CHECK-LABEL: redundant_i8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[2]
 ; CHECK-NEXT:    eor w0, w8, w8, lsl #24
 ; CHECK-NEXT:    ret
@@ -451,6 +469,7 @@ define i64 @redundant_i16i64(<8 x i16> %x) {
 define i64 @both_i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: both_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[2]
 ; CHECK-NEXT:    smov x9, v0.b[2]
 ; CHECK-NEXT:    eor x0, x8, x9
@@ -465,6 +484,7 @@ define i64 @both_i8i64(<8 x i8> %x) {
 define i64 @redundant_i8i64(<8 x i8> %x) {
 ; CHECK-LABEL: redundant_i8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov x8, v0.b[2]
 ; CHECK-NEXT:    eor x0, x8, x8, lsl #56
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
index 9e4a3b9272e6e3..75d55773b3681e 100644
--- a/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
+++ b/llvm/test/CodeGen/AArch64/extract-subvec-combine.ll
@@ -6,6 +6,7 @@ define <2 x i32> @and_extract_zext_idx0(<4 x i16> %vec) nounwind {
 ; CHECK-SD-LABEL: and_extract_zext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_zext_idx0:
@@ -24,6 +25,7 @@ define <4 x i16> @and_extract_sext_idx0(<8 x i8> %vec) nounwind {
 ; CHECK-SD-LABEL: and_extract_sext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_sext_idx0:
@@ -43,6 +45,7 @@ define <2 x i32> @and_extract_zext_idx2(<4 x i16> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_zext_idx2:
@@ -63,6 +66,7 @@ define <4 x i16> @and_extract_sext_idx4(<8 x i8> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    ushll v0.8h, v0.8b, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: and_extract_sext_idx4:
@@ -82,6 +86,7 @@ define <2 x i32> @sext_extract_zext_idx0(<4 x i16> %vec) nounwind {
 ; CHECK-SD-LABEL: sext_extract_zext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_zext_idx0:
@@ -116,6 +121,7 @@ define <4 x i16> @sext_extract_sext_idx0(<8 x i8> %vec) nounwind {
 ; CHECK-SD-LABEL: sext_extract_sext_idx0:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_sext_idx0:
@@ -136,6 +142,7 @@ define <2 x i32> @sext_extract_zext_idx2(<4 x i16> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_zext_idx2:
@@ -157,6 +164,7 @@ define <4 x i16> @sext_extract_sext_idx4(<8 x i8> %vec) nounwind {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sshll v0.8h, v0.8b, #0
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_extract_sext_idx4:
@@ -176,6 +184,7 @@ define <4 x i16> @sext_extract_sext_idx4(<8 x i8> %vec) nounwind {
 define <8 x i8> @sext_extract_idx(<16 x i8> %vec) nounwind {
 ; CHECK-LABEL: sext_extract_idx:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %extract = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %vec, i64 0)
   ret <8 x i8> %extract

diff  --git a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
index eeae76f6b0f6e7..12bd2db2297d77 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
@@ -188,6 +188,7 @@ define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    movi v1.4s, #127
 ; CHECK-NEXT:    add x8, sp, #8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-NEXT:    cmhi v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s

diff  --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index 5adc225168198c..5e5fdd6d317057 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -39,6 +39,7 @@ define i64 @extract_v2i64_opaque(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -76,6 +77,7 @@ define i64 @extract_v2i64_freeze(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -142,6 +144,7 @@ define i64 @extract_v2i64_extract_build_vector_opaque(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
@@ -176,6 +179,7 @@ define i64 @extract_v2i32_zext(<2 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -207,6 +211,7 @@ define i64 @extract_v2double_fptosi(<2 x double> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -238,6 +243,7 @@ define double @extract_v2double_fneg(<2 x double> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    fneg v0.2d, v0.2d
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr d0, [x8]
@@ -268,6 +274,7 @@ define i32 @extract_v4i32_add(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI12_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -304,6 +311,7 @@ define float @extract_v4i32_minimum(<4 x float> %a, <4 x float> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -334,6 +342,7 @@ define float @extract_v4i32_minimum_build_vector(<4 x float> %a, <4 x float> %b,
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI14_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI14_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -384,6 +393,7 @@ define float @extract_v4i32_copysign_build_vector(<4 x float> %a, <4 x float> %b
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-SD-NEXT:    mvni v1.4s, #128, lsl #24
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -442,6 +452,7 @@ define i32 @extract_v4i32_icmp(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-SD-NEXT:    movi v2.4s, #1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI18_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -511,6 +522,7 @@ define i32 @extract_v4float_fcmp(<4 x float> %a, <4 x float> %b, i32 %c) {
 ; CHECK-SD-NEXT:    movi v1.4s, #1
 ; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v0.4s
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; CHECK-SD-NEXT:    str q0, [sp]
@@ -576,6 +588,7 @@ define i32 @extract_v4i32_select(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %c
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    ushll v1.4s, v2.4h, #0
 ; CHECK-SD-NEXT:    adrp x8, .LCPI22_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -645,6 +658,7 @@ define i32 @extract_v4i32_abs(<4 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    frintp v0.4s, v0.4s
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    frintm v0.4s, v0.4s
 ; CHECK-SD-NEXT:    fabs v0.4s, v0.4s
@@ -715,6 +729,7 @@ define i32 @extract_v4i32_abs_half_const(<4 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    adrp x8, .LCPI26_0
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI26_0]
 ; CHECK-SD-NEXT:    mov x8, sp
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
@@ -757,7 +772,9 @@ define i32 @extract_v4i32_vector_insert(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-NEXT:    str q1, [sp]
@@ -773,6 +790,7 @@ entry:
 define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-LABEL: extract_v4i32_vector_insert_const:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov w0, v1.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -787,6 +805,7 @@ define i32 @extract_v4i32_vector_extract(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-NEXT:    ldr w0, [x8]
@@ -812,6 +831,7 @@ entry:
 define i32 @extract_v4i32_load(<4 x i32> %a, <2 x i32> %b, i32 %c, ptr %arg) {
 ; CHECK-SD-LABEL: extract_v4i32_load:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    ldr w0, [x1, x8, lsl #2]
 ; CHECK-SD-NEXT:    ret
@@ -845,6 +865,7 @@ define double @extract_v4i32_bitcast(<4 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr d0, [x8]
@@ -871,6 +892,7 @@ entry:
 define double @extract_v4i32_bitcast_const(<4 x i32> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4i32_bitcast_const:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vector = bitcast <4 x i32> %a to <2 x double>
@@ -885,6 +907,7 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    uzp1 v1.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp]
@@ -897,8 +920,10 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) {
 ; CHECK-GI-NEXT:    sub sp, sp, #16
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-GI-NEXT:    adrp x8, .LCPI35_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov x9, sp
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    mov w8, w0
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
@@ -930,6 +955,7 @@ define i32 @extract_v4i32_splat(<4 x i32> %a, <2 x i32> %b, i32 %c) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    movi v0.4s, #11
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -970,6 +996,7 @@ define i32 @extract_v4i32_vp_add(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %m
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add v0.4s, v0.4s, v1.4s
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    ldr w0, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index e091b2a0a3ba6d..5460a376931a55 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -211,8 +211,11 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
 define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
 ; CHECK-CVT-SD-LABEL: test_select:
 ; CHECK-CVT-SD:       // %bb.0:
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-SD-NEXT:    cmp w0, #0
+; CHECK-CVT-SD-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_select:
@@ -223,20 +226,26 @@ define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
 ;
 ; CHECK-CVT-GI-LABEL: test_select:
 ; CHECK-CVT-GI:       // %bb.0:
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
 ; CHECK-CVT-GI-NEXT:    fmov w9, s1
 ; CHECK-CVT-GI-NEXT:    tst w0, #0x1
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-CVT-GI-NEXT:    fmov s0, w8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_select:
 ; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fmov w9, s1
 ; CHECK-FP16-GI-NEXT:    tst w0, #0x1
 ; CHECK-FP16-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-FP16-GI-NEXT:    fmov s0, w8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-FP16-GI-NEXT:    ret
   %r = select i1 %c, half %a, half %b
   ret half %r
@@ -247,8 +256,11 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
 ; CHECK-CVT-SD:       // %bb.0:
 ; CHECK-CVT-SD-NEXT:    fcvt s3, h3
 ; CHECK-CVT-SD-NEXT:    fcvt s2, h2
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-SD-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-SD-NEXT:    fcmp s2, s3
 ; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_select_cc:
@@ -261,20 +273,26 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    fcvt s2, h2
 ; CHECK-CVT-GI-NEXT:    fcvt s3, h3
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
 ; CHECK-CVT-GI-NEXT:    fmov w9, s1
 ; CHECK-CVT-GI-NEXT:    fcmp s2, s3
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-CVT-GI-NEXT:    fmov s0, w8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_select_cc:
 ; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-FP16-GI-NEXT:    fcmp h2, h3
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fmov w9, s1
 ; CHECK-FP16-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-FP16-GI-NEXT:    fmov s0, w8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-FP16-GI-NEXT:    ret
   %cc = fcmp une half %c, %d
   %r = select i1 %cc, half %a, half %b
@@ -312,7 +330,10 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
 ; CHECK-CVT-SD-LABEL: test_select_cc_f16_f32:
 ; CHECK-CVT-SD:       // %bb.0:
 ; CHECK-CVT-SD-NEXT:    fcmp s2, s3
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-SD-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-SD-NEXT:    fcsel s0, s0, s1, ne
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_select_cc_f16_f32:
@@ -323,20 +344,26 @@ define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
 ;
 ; CHECK-CVT-GI-LABEL: test_select_cc_f16_f32:
 ; CHECK-CVT-GI:       // %bb.0:
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-CVT-GI-NEXT:    fcmp s2, s3
 ; CHECK-CVT-GI-NEXT:    fmov w8, s0
 ; CHECK-CVT-GI-NEXT:    fmov w9, s1
 ; CHECK-CVT-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-CVT-GI-NEXT:    fmov s0, w8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_select_cc_f16_f32:
 ; CHECK-FP16-GI:       // %bb.0:
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
+; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-FP16-GI-NEXT:    fcmp s2, s3
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fmov w9, s1
 ; CHECK-FP16-GI-NEXT:    csel w8, w8, w9, ne
 ; CHECK-FP16-GI-NEXT:    fmov s0, w8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-FP16-GI-NEXT:    ret
   %cc = fcmp une float %c, %d
   %r = select i1 %cc, half %a, half %b
@@ -734,6 +761,7 @@ define i1 @test_fcmp_ord(half %a, half %b) #0 {
 define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-CVT-SD-LABEL: test_fccmp:
 ; CHECK-CVT-SD:       // %bb.0:
+; CHECK-CVT-SD-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-SD-NEXT:    fcvt s1, h0
 ; CHECK-CVT-SD-NEXT:    fmov s2, #5.00000000
 ; CHECK-CVT-SD-NEXT:    adrp x8, .LCPI29_0
@@ -759,6 +787,7 @@ define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    mov w8, #17664 // =0x4500
 ; CHECK-CVT-GI-NEXT:    mov w9, #18432 // =0x4800
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-GI-NEXT:    fcvt s2, h0
 ; CHECK-CVT-GI-NEXT:    fmov s1, w8
 ; CHECK-CVT-GI-NEXT:    fmov s3, w9
@@ -774,6 +803,7 @@ define void @test_fccmp(half %in, ptr %out) {
 ; CHECK-FP16-GI-LABEL: test_fccmp:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    fmov h1, #5.00000000
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-FP16-GI-NEXT:    fmov h2, #8.00000000
 ; CHECK-FP16-GI-NEXT:    fmov w8, s0
 ; CHECK-FP16-GI-NEXT:    fcmp h0, h1
@@ -1059,6 +1089,7 @@ define double @test_fpext_double(half %a) #0 {
 define i16 @test_bitcast_halftoi16(half %a) #0 {
 ; CHECK-LABEL: test_bitcast_halftoi16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %r = bitcast half %a to i16
@@ -1069,6 +1100,7 @@ define half @test_bitcast_i16tohalf(i16 %a) #0 {
 ; CHECK-LABEL: test_bitcast_i16tohalf:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %r = bitcast i16 %a to half
   ret half %r
@@ -1368,9 +1400,11 @@ define half @test_fma(half %a, half %b, half %c) #0 {
 define half @test_fabs(half %a) #0 {
 ; CHECK-CVT-LABEL: test_fabs:
 ; CHECK-CVT:       // %bb.0:
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-CVT-NEXT:    fmov w8, s0
 ; CHECK-CVT-NEXT:    and w8, w8, #0x7fff
 ; CHECK-CVT-NEXT:    fmov s0, w8
+; CHECK-CVT-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: test_fabs:
@@ -1444,19 +1478,28 @@ define half @test_copysign(half %a, half %b) #0 {
 ; CHECK-FP16-SD-LABEL: test_copysign:
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP16-SD-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_copysign:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
+; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_copysign:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
+; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-FP16-GI-NEXT:    ret
   %r = call half @llvm.copysign.f16(half %a, half %b)
   ret half %r
@@ -1467,6 +1510,7 @@ define half @test_copysign_f32(half %a, float %b) #0 {
 ; CHECK-CVT-SD:       // %bb.0:
 ; CHECK-CVT-SD-NEXT:    fcvt s0, h0
 ; CHECK-CVT-SD-NEXT:    mvni v2.4s, #128, lsl #24
+; CHECK-CVT-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-CVT-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-CVT-SD-NEXT:    fcvt h0, s0
 ; CHECK-CVT-SD-NEXT:    ret
@@ -1475,21 +1519,27 @@ define half @test_copysign_f32(half %a, float %b) #0 {
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_copysign_f32:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    fcvt h1, s1
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_copysign_f32:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    fcvt h1, s1
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-FP16-GI-NEXT:    ret
   %tb = fptrunc float %b to half
   %r = call half @llvm.copysign.f16(half %a, half %tb)
@@ -1510,21 +1560,27 @@ define half @test_copysign_f64(half %a, double %b) #0 {
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    fcvt h1, d1
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-FP16-SD-NEXT:    ret
 ;
 ; CHECK-CVT-GI-LABEL: test_copysign_f64:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    fcvt h1, d1
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-CVT-GI-NEXT:    ret
 ;
 ; CHECK-FP16-GI-LABEL: test_copysign_f64:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    fcvt h1, d1
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-FP16-GI-NEXT:    ret
   %tb = fptrunc double %b to half
   %r = call half @llvm.copysign.f16(half %a, half %tb)
@@ -1541,11 +1597,14 @@ define float @test_copysign_extended(half %a, half %b) #0 {
 ; CHECK-CVT-SD-NEXT:    fcvt s0, h0
 ; CHECK-CVT-SD-NEXT:    mvni v2.4s, #128, lsl #24
 ; CHECK-CVT-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-CVT-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-CVT-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: test_copysign_extended:
 ; CHECK-FP16-SD:       // %bb.0:
 ; CHECK-FP16-SD-NEXT:    mvni v2.8h, #128, lsl #8
+; CHECK-FP16-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP16-SD-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-FP16-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
 ; CHECK-FP16-SD-NEXT:    fcvt s0, h0
 ; CHECK-FP16-SD-NEXT:    ret
@@ -1553,6 +1612,8 @@ define float @test_copysign_extended(half %a, half %b) #0 {
 ; CHECK-CVT-GI-LABEL: test_copysign_extended:
 ; CHECK-CVT-GI:       // %bb.0:
 ; CHECK-CVT-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-CVT-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
+; CHECK-CVT-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-CVT-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; CHECK-CVT-GI-NEXT:    fcvt s0, h0
 ; CHECK-CVT-GI-NEXT:    ret
@@ -1560,6 +1621,8 @@ define float @test_copysign_extended(half %a, half %b) #0 {
 ; CHECK-FP16-GI-LABEL: test_copysign_extended:
 ; CHECK-FP16-GI:       // %bb.0:
 ; CHECK-FP16-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-FP16-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
+; CHECK-FP16-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-FP16-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
 ; CHECK-FP16-GI-NEXT:    fcvt s0, h0
 ; CHECK-FP16-GI-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
index 17b75f89b32dab..903aa8adf70851 100644
--- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll
@@ -144,7 +144,7 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
 ; CHECK-GI-LABEL: fabs_v4f128:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov x8, v0.d[1]
-; CHECK-GI-NEXT:    mov v0.d[0], v0.d[0]
+; CHECK-GI-NEXT:    mov v7.d[0], v0.d[0]
 ; CHECK-GI-NEXT:    mov x9, v1.d[1]
 ; CHECK-GI-NEXT:    mov x10, v2.d[1]
 ; CHECK-GI-NEXT:    mov x11, v3.d[1]
@@ -152,13 +152,14 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v2.d[0], v2.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[0], v3.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
-; CHECK-GI-NEXT:    mov v0.d[1], x8
+; CHECK-GI-NEXT:    mov v7.d[1], x8
 ; CHECK-GI-NEXT:    and x8, x9, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x9, x10, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x10, x11, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    mov v1.d[1], x8
 ; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    mov v3.d[1], x10
+; CHECK-GI-NEXT:    mov v0.16b, v7.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)

diff  --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll
index 90b701675ad4f2..43e90070736345 100644
--- a/llvm/test/CodeGen/AArch64/fabs.ll
+++ b/llvm/test/CodeGen/AArch64/fabs.ll
@@ -27,9 +27,11 @@ entry:
 define half @fabs_f16(half %a) {
 ; CHECK-SD-NOFP16-LABEL: fabs_f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-SD-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-SD-NOFP16-NEXT:    and w8, w8, #0x7fff
 ; CHECK-SD-NOFP16-NEXT:    fmov s0, w8
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fabs_f16:
@@ -39,9 +41,11 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-GI-NOFP16-NEXT:    and w8, w8, #0x7fff
 ; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -66,18 +70,27 @@ entry:
 define <3 x double> @fabs_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fabs_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fabs v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fabs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fabs_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fabs d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fabs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.fabs.v3f64(<3 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/faddp-half.ll b/llvm/test/CodeGen/AArch64/faddp-half.ll
index 88676db854f4a3..447476e76ec64a 100644
--- a/llvm/test/CodeGen/AArch64/faddp-half.ll
+++ b/llvm/test/CodeGen/AArch64/faddp-half.ll
@@ -5,16 +5,19 @@
 define half @faddp_2xhalf(<2 x half> %a) {
 ; CHECK-LABEL: faddp_2xhalf:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_2xhalf:
 ; CHECKNOFP16:       // %bb.0: // %entry
+; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
@@ -26,16 +29,19 @@ entry:
 define half @faddp_2xhalf_commute(<2 x half> %a) {
 ; CHECK-LABEL: faddp_2xhalf_commute:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_2xhalf_commute:
 ; CHECKNOFP16:       // %bb.0: // %entry
+; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
@@ -47,16 +53,19 @@ entry:
 define half @faddp_4xhalf(<4 x half> %a) {
 ; CHECK-LABEL: faddp_4xhalf:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_4xhalf:
 ; CHECKNOFP16:       // %bb.0: // %entry
+; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v0.4s, v1.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -68,16 +77,19 @@ entry:
 define half @faddp_4xhalf_commute(<4 x half> %a) {
 ; CHECK-LABEL: faddp_4xhalf_commute:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp h0, v0.2h
 ; CHECK-NEXT:    ret
 ;
 ; CHECKNOFP16-LABEL: faddp_4xhalf_commute:
 ; CHECKNOFP16:       // %bb.0: // %entry
+; CHECKNOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECKNOFP16-NEXT:    dup v1.4h, v0.h[1]
 ; CHECKNOFP16-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECKNOFP16-NEXT:    fcvtl v1.4s, v1.4h
 ; CHECKNOFP16-NEXT:    fadd v0.4s, v1.4s, v0.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v0.4s
+; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
@@ -103,6 +115,7 @@ define half @faddp_8xhalf(<8 x half> %a) {
 ; CHECKNOFP16-NEXT:    fadd v1.4s, v0.4s, v1.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
 ; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
+; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -128,6 +141,7 @@ define half @faddp_8xhalf_commute(<8 x half> %a) {
 ; CHECKNOFP16-NEXT:    fadd v1.4s, v1.4s, v0.4s
 ; CHECKNOFP16-NEXT:    fcvtn v0.4h, v2.4s
 ; CHECKNOFP16-NEXT:    fcvtn2 v0.8h, v1.4s
+; CHECKNOFP16-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECKNOFP16-NEXT:    ret
 entry:
   %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git a/llvm/test/CodeGen/AArch64/faddp.ll b/llvm/test/CodeGen/AArch64/faddp.ll
index f86b3994c0f908..d0c8f6e26d4d6e 100644
--- a/llvm/test/CodeGen/AArch64/faddp.ll
+++ b/llvm/test/CodeGen/AArch64/faddp.ll
@@ -4,6 +4,7 @@
 define float @faddp_2xfloat(<2 x float> %a) {
 ; CHECK-LABEL: faddp_2xfloat:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -40,6 +41,7 @@ entry:
 define float @faddp_2xfloat_commute(<2 x float> %a) {
 ; CHECK-LABEL: faddp_2xfloat_commute:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -102,6 +104,7 @@ entry:
 define float @faddp_2xfloat_strict(<2 x float> %a) #0 {
 ; CHECK-LABEL: faddp_2xfloat_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -138,6 +141,7 @@ entry:
 define float @faddp_2xfloat_commute_strict(<2 x float> %a) #0 {
 ; CHECK-LABEL: faddp_2xfloat_commute_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    faddp s0, v0.2s
 ; CHECK-NEXT:    ret
 entry:
@@ -274,6 +278,7 @@ define <4 x half> @faddp_v8f16(<8 x half> %a, <8 x half> %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    faddp v0.8h, v0.8h, v0.8h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %1 = fadd <8 x half> %a, %b
   %2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

diff  --git a/llvm/test/CodeGen/AArch64/faddsub.ll b/llvm/test/CodeGen/AArch64/faddsub.ll
index cf7eda1d3e6b76..b15579199a0598 100644
--- a/llvm/test/CodeGen/AArch64/faddsub.ll
+++ b/llvm/test/CodeGen/AArch64/faddsub.ll
@@ -68,20 +68,34 @@ entry:
 define <3 x double> @fadd_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fadd_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fadd v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fadd v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fadd_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fadd d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fadd d2, d2, d5
 ; CHECK-GI-NEXT:    fadd v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fadd <3 x double> %a, %b
@@ -395,20 +409,34 @@ entry:
 define <3 x double> @fsub_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fsub_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fsub v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fsub v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fsub_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fsub d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fsub d2, d2, d5
 ; CHECK-GI-NEXT:    fsub v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fsub <3 x double> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
index 182cdee6459dc6..5e14e63a23d3ca 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@@ -734,6 +734,7 @@ define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) {
 define i64 @load_sext_shift_offreg_imm1(i32 %a) {
 ; CHECK-LABEL: load_sext_shift_offreg_imm1:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x8, x0, #3, #32
 ; CHECK-NEXT:    ldr x0, [x8, #8]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
index eae90015773004..3dc4771eb01c15 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -53,6 +53,7 @@ define ptr @test_array4(ptr %a) {
 define ptr @test_array5(ptr %a, i32 %i) {
 ; CHECK-LABEL: test_array5:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    mov x9, #4 ; =0x4
 ; CHECK-NEXT:    madd x0, x8, x9, x0

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
index 750be0b19f2c70..76f81719b4547e 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll
@@ -55,6 +55,7 @@ define i64 @asr_zext_i1_i64(i1 %b) {
 define i64 @asr_sext_i1_i64(i1 %b) {
 ; CHECK-LABEL: asr_sext_i1_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x0, x0, #0, #1
 ; CHECK-NEXT:    ret
   %1 = sext i1 %b to i64
@@ -160,6 +161,7 @@ define i32 @lsl_sext_i1_i32(i1 %b) {
 define i64 @lsl_zext_i1_i64(i1 %b) {
 ; CHECK-LABEL: lsl_zext_i1_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #4, #1
 ; CHECK-NEXT:    ret
   %1 = zext i1 %b to i64
@@ -170,6 +172,7 @@ define i64 @lsl_zext_i1_i64(i1 %b) {
 define i64 @lsl_sext_i1_i64(i1 %b) {
 ; CHECK-LABEL: lsl_sext_i1_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #4, #1
 ; CHECK-NEXT:    ret
   %1 = sext i1 %b to i64
@@ -244,6 +247,7 @@ define i32 @lsl_sext_i8_i32(i8 %b) {
 define i64 @lsl_zext_i8_i64(i8 %b) {
 ; CHECK-LABEL: lsl_zext_i8_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #4, #8
 ; CHECK-NEXT:    ret
   %1 = zext i8 %b to i64
@@ -254,6 +258,7 @@ define i64 @lsl_zext_i8_i64(i8 %b) {
 define i64 @lsl_sext_i8_i64(i8 %b) {
 ; CHECK-LABEL: lsl_sext_i8_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #4, #8
 ; CHECK-NEXT:    ret
   %1 = sext i8 %b to i64
@@ -306,6 +311,7 @@ define i32 @lsl_sext_i16_i32(i16 %b) {
 define i64 @lsl_zext_i16_i64(i16 %b) {
 ; CHECK-LABEL: lsl_zext_i16_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #8, #16
 ; CHECK-NEXT:    ret
   %1 = zext i16 %b to i64
@@ -316,6 +322,7 @@ define i64 @lsl_zext_i16_i64(i16 %b) {
 define i64 @lsl_sext_i16_i64(i16 %b) {
 ; CHECK-LABEL: lsl_sext_i16_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #8, #16
 ; CHECK-NEXT:    ret
   %1 = sext i16 %b to i64
@@ -344,6 +351,7 @@ define zeroext i32 @lsl_i32(i32 %a) {
 define i64 @lsl_zext_i32_i64(i32 %b) {
 ; CHECK-LABEL: lsl_zext_i32_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x0, x0, #16, #32
 ; CHECK-NEXT:    ret
   %1 = zext i32 %b to i64
@@ -354,6 +362,7 @@ define i64 @lsl_zext_i32_i64(i32 %b) {
 define i64 @lsl_sext_i32_i64(i32 %b) {
 ; CHECK-LABEL: lsl_sext_i32_i64:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x0, x0, #16, #32
 ; CHECK-NEXT:    ret
   %1 = sext i32 %b to i64
@@ -671,6 +680,7 @@ define i32 @ashr_zero(i32 %a) {
 define i64 @shl_zext_zero(i32 %a) {
 ; CHECK-LABEL: shl_zext_zero:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsr w0, w0, #0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %a to i64
@@ -681,6 +691,7 @@ define i64 @shl_zext_zero(i32 %a) {
 define i64 @lshr_zext_zero(i32 %a) {
 ; CHECK-LABEL: lshr_zext_zero:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsr w0, w0, #0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %a to i64
@@ -691,6 +702,7 @@ define i64 @lshr_zext_zero(i32 %a) {
 define i64 @ashr_zext_zero(i32 %a) {
 ; CHECK-LABEL: ashr_zext_zero:
 ; CHECK:       ; %bb.0:
+; CHECK-NEXT:    ; kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsr w0, w0, #0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %a to i64

diff  --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index ac1450bef32102..66f26fc9d85973 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -247,6 +247,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) {
 ; CHECK-SD-NOFP16-NEXT:    cmp w0, #0
 ; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    fcsel s0, s9, s8, lt
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -284,6 +285,7 @@ define half @f128_half(fp128 %a, fp128 %b, half %d, half %e) {
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    csel w8, w8, w9, lt
 ; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -345,8 +347,11 @@ define half @f16_half(half %a, half %b, half %d, half %e) {
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h3 killed $h3 def $s3
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h2 killed $h2 def $s2
 ; CHECK-SD-NOFP16-NEXT:    fcmp s0, s1
 ; CHECK-SD-NOFP16-NEXT:    fcsel s0, s2, s3, mi
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: f16_half:
@@ -359,20 +364,26 @@ define half @f16_half(half %a, half %b, half %d, half %e) {
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h2 killed $h2 def $s2
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h3 killed $h3 def $s3
 ; CHECK-GI-NOFP16-NEXT:    fmov w8, s2
 ; CHECK-GI-NOFP16-NEXT:    fmov w9, s3
 ; CHECK-GI-NOFP16-NEXT:    fcmp s0, s1
 ; CHECK-GI-NOFP16-NEXT:    csel w8, w8, w9, mi
 ; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: f16_half:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $h2 killed $h2 def $s2
+; CHECK-GI-FP16-NEXT:    // kill: def $h3 killed $h3 def $s3
 ; CHECK-GI-FP16-NEXT:    fcmp h0, h1
 ; CHECK-GI-FP16-NEXT:    fmov w8, s2
 ; CHECK-GI-FP16-NEXT:    fmov w9, s3
 ; CHECK-GI-FP16-NEXT:    csel w8, w8, w9, mi
 ; CHECK-GI-FP16-NEXT:    fmov s0, w8
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fcmp olt half %a, %b
@@ -668,10 +679,12 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 160
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    stp q2, q5, [sp, #112] // 32-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-SD-NEXT:    ldr d5, [sp, #184]
-; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    str q3, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp d3, d2, [sp, #168]
+; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    mov v1.16b, v4.16b
@@ -709,6 +722,9 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-SD-NEXT:    fmov d2, x8
 ; CHECK-SD-NEXT:    bsl v2.16b, v4.16b, v3.16b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    add sp, sp, #160
 ; CHECK-SD-NEXT:    ret
 ;
@@ -730,6 +746,8 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-GI-NEXT:    stp q5, q2, [sp, #32] // 32-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr d2, [sp, #184]
 ; CHECK-GI-NEXT:    ldr x20, [sp, #200]
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    str q7, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    str q2, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr d2, [sp, #192]
@@ -767,6 +785,7 @@ define <3 x double> @v3f128_double(<3 x fp128> %a, <3 x fp128> %b, <3 x double>
 ; CHECK-GI-NEXT:    orr v0.16b, v1.16b, v0.16b
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #176
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -790,39 +809,57 @@ entry:
 define <3 x double> @v3f64_double(<3 x double> %a, <3 x double> %b, <3 x double> %d, <3 x double> %e) {
 ; CHECK-SD-LABEL: v3f64_double:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    ldr d16, [sp, #24]
+; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    ldr d16, [sp, #24]
-; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
+; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-SD-NEXT:    fcmgt v2.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    bsl v2.16b, v17.16b, v16.16b
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v3f64_double:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-GI-NEXT:    fcmp d2, d5
+; CHECK-GI-NEXT:    ldr x8, [sp]
+; CHECK-GI-NEXT:    ldr x10, [sp, #24]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    ldr x8, [sp]
-; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-GI-NEXT:    fcmp d2, d5
-; CHECK-GI-NEXT:    ldr x10, [sp, #24]
-; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-GI-NEXT:    cset w9, mi
 ; CHECK-GI-NEXT:    sbfx x9, x9, #0, #1
-; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
+; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, x9
 ; CHECK-GI-NEXT:    bic x9, x10, x9
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fcmp olt <3 x double> %a, %b
@@ -868,6 +905,12 @@ entry:
 define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x i32> %e) {
 ; CHECK-SD-LABEL: v3f64_i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    fcmgt v1.2d, v5.2d, v2.2d
@@ -878,16 +921,20 @@ define <3 x i32> @v3f64_i32(<3 x double> %a, <3 x double> %b, <3 x i32> %d, <3 x
 ;
 ; CHECK-GI-LABEL: v3f64_i32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov w8, #31 // =0x1f
 ; CHECK-GI-NEXT:    fcmp d2, d5
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
-; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
 ; CHECK-GI-NEXT:    cset w9, mi
 ; CHECK-GI-NEXT:    mov v2.s[0], w9
-; CHECK-GI-NEXT:    mov v1.s[1], w8
 ; CHECK-GI-NEXT:    mov w9, #-1 // =0xffffffff
+; CHECK-GI-NEXT:    fcmgt v0.2d, v3.2d, v0.2d
+; CHECK-GI-NEXT:    mov v1.s[1], w8
 ; CHECK-GI-NEXT:    mov v3.s[0], w9
 ; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-GI-NEXT:    mov v1.s[2], w8

diff  --git a/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll b/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
index 07920220452cd2..b9713b57cef681 100644
--- a/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
+++ b/llvm/test/CodeGen/AArch64/fcopysign-noneon.ll
@@ -96,7 +96,10 @@ define float @copysign32(float %a, float %b) {
 ; CHECK-LABEL: copysign32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mvni v2.4s, #128, lsl #24
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NONEON-LABEL: copysign32:
@@ -116,8 +119,11 @@ define double @copysign64(double %a, double %b) {
 ; CHECK-LABEL: copysign64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg v2.2d, v2.2d
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-NONEON-LABEL: copysign64:

diff  --git a/llvm/test/CodeGen/AArch64/fcopysign.ll b/llvm/test/CodeGen/AArch64/fcopysign.ll
index 67eb8428dc23b3..a42ec8e253be29 100644
--- a/llvm/test/CodeGen/AArch64/fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/fcopysign.ll
@@ -6,8 +6,11 @@ define double @copysign_f64(double %a, double %b) {
 ; CHECK-LABEL: copysign_f64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    movi v2.2d, #0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg v2.2d, v2.2d
 ; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = call double @llvm.copysign.f64(double %a, double %b)
@@ -18,13 +21,19 @@ define float @copysign_f32(float %a, float %b) {
 ; CHECK-SD-LABEL: copysign_f32:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    mvni v2.4s, #128, lsl #24
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-SD-NEXT:    bif v0.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: copysign_f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mvni v2.2s, #128, lsl #24
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $d0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $d1
 ; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call float @llvm.copysign.f32(float %a, float %b)
@@ -44,7 +53,10 @@ define half @copysign_f16(half %a, half %b) {
 ; CHECK-GI-LABEL: copysign_f16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mvni v2.4h, #128, lsl #8
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $d0
+; CHECK-GI-NEXT:    // kill: def $h1 killed $h1 def $d1
 ; CHECK-GI-NEXT:    bif v0.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call half @llvm.copysign.f16(half %a, half %b)
@@ -67,28 +79,42 @@ define <3 x double> @copysign_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: copysign_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    movi v6.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fneg v1.2d, v6.2d
 ; CHECK-SD-NEXT:    bif v0.16b, v3.16b, v1.16b
 ; CHECK-SD-NEXT:    bif v2.16b, v5.16b, v1.16b
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: copysign_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    movi v6.2d, #0xffffffffffffffff
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    fmov x9, d5
-; CHECK-GI-NEXT:    fneg v1.2d, v6.2d
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x7fffffffffffffff
 ; CHECK-GI-NEXT:    and x9, x9, #0x8000000000000000
+; CHECK-GI-NEXT:    fneg v1.2d, v6.2d
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    bif v0.16b, v3.16b, v1.16b
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.copysign.v3f64(<3 x double> %a, <3 x double> %b)

diff  --git a/llvm/test/CodeGen/AArch64/fcvt.ll b/llvm/test/CodeGen/AArch64/fcvt.ll
index 72d124f8122869..b408e9c1bd4e60 100644
--- a/llvm/test/CodeGen/AArch64/fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt.ll
@@ -66,18 +66,27 @@ entry:
 define <3 x double> @ceil_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: ceil_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintp v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintp v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: ceil_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frintp d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frintp v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.ceil.v3f64(<3 x double> %a)
@@ -356,18 +365,27 @@ entry:
 define <3 x double> @floor_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: floor_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintm v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintm v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: floor_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frintm d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frintm v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.floor.v3f64(<3 x double> %a)
@@ -646,18 +664,27 @@ entry:
 define <3 x double> @nearbyint_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: nearbyint_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frinti v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frinti v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: nearbyint_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frinti d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frinti v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.nearbyint.v3f64(<3 x double> %a)
@@ -936,18 +963,27 @@ entry:
 define <3 x double> @roundeven_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: roundeven_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintn v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintn v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: roundeven_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frintn d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frintn v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.roundeven.v3f64(<3 x double> %a)
@@ -1226,18 +1262,27 @@ entry:
 define <3 x double> @rint_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: rint_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintx v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintx v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: rint_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frintx d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frintx v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.rint.v3f64(<3 x double> %a)
@@ -1516,18 +1561,27 @@ entry:
 define <3 x double> @round_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: round_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frinta v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frinta v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: round_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frinta d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frinta v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.round.v3f64(<3 x double> %a)
@@ -1806,18 +1860,27 @@ entry:
 define <3 x double> @trunc_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: trunc_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    frintz v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    frintz v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: trunc_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    frintz d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    frintz v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.trunc.v3f64(<3 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index d9651fe63d28bf..251d7a424175b3 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -305,6 +305,7 @@ define <2 x i32> @test4_sat(<2 x double> %d) {
 ; CHECK-NEXT:    fcvtzs w9, d1
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
   %vcvt.i = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %mul.i)

diff  --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index ec7d1918caa128..0627250d077912 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -100,6 +100,7 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 {
 define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
 ; CHECK-LABEL: splat_three_fdiv_4xfloat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    fmov v4.4s, #1.00000000
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    fdiv v4.4s, v4.4s, v0.4s
@@ -119,6 +120,7 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b,
 define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 {
 ; CHECK-LABEL: splat_fdiv_v4f32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    fmov v2.4s, #1.00000000
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    fdiv v0.4s, v2.4s, v0.4s
@@ -169,8 +171,9 @@ entry:
 define <vscale x 2 x double> @splat_fdiv_nxv2f64(double %D, <vscale x 2 x double> %a) #1 {
 ; CHECK-LABEL: splat_fdiv_nxv2f64:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/fdiv.ll b/llvm/test/CodeGen/AArch64/fdiv.ll
index 5897b2235296b6..5bdccccc62b99c 100644
--- a/llvm/test/CodeGen/AArch64/fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv.ll
@@ -68,20 +68,34 @@ entry:
 define <3 x double> @fdiv_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fdiv_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fdiv v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fdiv v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fdiv_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fdiv d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fdiv d2, d2, d5
 ; CHECK-GI-NEXT:    fdiv v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fdiv <3 x double> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/fexplog.ll b/llvm/test/CodeGen/AArch64/fexplog.ll
index 12ca7b655611c3..f13e2fcd1c4483 100644
--- a/llvm/test/CodeGen/AArch64/fexplog.ll
+++ b/llvm/test/CodeGen/AArch64/fexplog.ll
@@ -69,10 +69,13 @@ define <2 x double> @exp_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -87,11 +90,14 @@ define <2 x double> @exp_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -173,17 +179,22 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -204,23 +215,28 @@ define <4 x double> @exp_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl exp
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -236,15 +252,20 @@ define <2 x float> @exp_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -256,12 +277,16 @@ define <2 x float> @exp_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -283,16 +308,20 @@ define <3 x float> @exp_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -310,14 +339,18 @@ define <3 x float> @exp_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -340,22 +373,27 @@ define <4 x float> @exp_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -376,17 +414,22 @@ define <4 x float> @exp_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -412,37 +455,46 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl expf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl expf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl expf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -476,31 +528,41 @@ define <8 x float> @exp_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl expf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl expf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -679,6 +741,7 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -709,6 +772,7 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -723,6 +787,7 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -752,7 +817,8 @@ define <4 x half> @exp_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1175,20 +1241,19 @@ define <16 x half> @exp_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl expf
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -1307,10 +1372,13 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1325,11 +1393,14 @@ define <2 x double> @exp2_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1411,17 +1482,22 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl exp2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -1442,23 +1518,28 @@ define <4 x double> @exp2_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl exp2
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -1474,15 +1555,20 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1494,12 +1580,16 @@ define <2 x float> @exp2_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -1521,16 +1611,20 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1548,14 +1642,18 @@ define <3 x float> @exp2_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -1578,22 +1676,27 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1614,17 +1717,22 @@ define <4 x float> @exp2_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -1650,37 +1758,46 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl exp2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl exp2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl exp2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -1714,31 +1831,41 @@ define <8 x float> @exp2_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl exp2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl exp2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -1917,6 +2044,7 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -1947,6 +2075,7 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1961,6 +2090,7 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -1990,7 +2120,8 @@ define <4 x half> @exp2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2413,20 +2544,19 @@ define <16 x half> @exp2_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl exp2f
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -2545,10 +2675,13 @@ define <2 x double> @log_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -2563,11 +2696,14 @@ define <2 x double> @log_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -2649,17 +2785,22 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -2680,23 +2821,28 @@ define <4 x double> @log_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -2712,15 +2858,20 @@ define <2 x float> @log_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -2732,12 +2883,16 @@ define <2 x float> @log_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -2759,16 +2914,20 @@ define <3 x float> @log_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -2786,14 +2945,18 @@ define <3 x float> @log_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -2816,22 +2979,27 @@ define <4 x float> @log_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -2852,17 +3020,22 @@ define <4 x float> @log_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -2888,37 +3061,46 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl logf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl logf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl logf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -2952,31 +3134,41 @@ define <8 x float> @log_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl logf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl logf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -3155,6 +3347,7 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -3185,6 +3378,7 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3199,6 +3393,7 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -3228,7 +3423,8 @@ define <4 x half> @log_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -3651,20 +3847,19 @@ define <16 x half> @log_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl logf
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -3783,10 +3978,13 @@ define <2 x double> @log2_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -3801,11 +3999,14 @@ define <2 x double> @log2_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -3887,17 +4088,22 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log2
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -3918,23 +4124,28 @@ define <4 x double> @log2_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log2
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -3950,15 +4161,20 @@ define <2 x float> @log2_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -3970,12 +4186,16 @@ define <2 x float> @log2_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -3997,16 +4217,20 @@ define <3 x float> @log2_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -4024,14 +4248,18 @@ define <3 x float> @log2_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -4054,22 +4282,27 @@ define <4 x float> @log2_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -4090,17 +4323,22 @@ define <4 x float> @log2_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -4126,37 +4364,46 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log2f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log2f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -4190,31 +4437,41 @@ define <8 x float> @log2_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl log2f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl log2f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -4393,6 +4650,7 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -4423,6 +4681,7 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4437,6 +4696,7 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -4466,7 +4726,8 @@ define <4 x half> @log2_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -4889,20 +5150,19 @@ define <16 x half> @log2_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl log2f
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -5021,10 +5281,13 @@ define <2 x double> @log10_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log10
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log10
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -5039,11 +5302,14 @@ define <2 x double> @log10_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log10
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -5125,17 +5391,22 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log10
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl log10
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl log10
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -5156,23 +5427,28 @@ define <4 x double> @log10_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl log10
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl log10
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -5188,15 +5464,20 @@ define <2 x float> @log10_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5208,12 +5489,16 @@ define <2 x float> @log10_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -5235,16 +5520,20 @@ define <3 x float> @log10_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -5262,14 +5551,18 @@ define <3 x float> @log10_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -5292,22 +5585,27 @@ define <4 x float> @log10_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -5328,17 +5626,22 @@ define <4 x float> @log10_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -5364,37 +5667,46 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl log10f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl log10f
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl log10f
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -5428,31 +5740,41 @@ define <8 x float> @log10_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl log10f
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl log10f
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -5631,6 +5953,7 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -5661,6 +5984,7 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5675,6 +5999,7 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -5704,7 +6029,8 @@ define <4 x half> @log10_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -6127,20 +6453,19 @@ define <16 x half> @log10_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl log10f
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
index 6e189c1206c15f..ab7c2afbbf871f 100644
--- a/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-point-conv-vec-pat.ll
@@ -106,6 +106,7 @@ define <8 x half> @h_v8_s8(<8 x i16> %u) #0 {
 define float @l0_extract_f_v2s(<2 x i32> %u) {
 ; CHECK-LABEL: l0_extract_f_v2s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf s0, s0
 ; CHECK-NEXT:    ret
   %i = extractelement <2 x i32> %u, i64 0
@@ -117,6 +118,7 @@ define float @l0_extract_f_v2s(<2 x i32> %u) {
 define float @l0_extract_f_v2u(<2 x i32> %u) {
 ; CHECK-LABEL: l0_extract_f_v2u:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf s0, s0
 ; CHECK-NEXT:    ret
   %i = extractelement <2 x i32> %u, i64 0
@@ -245,6 +247,7 @@ define half @ln_extract_h_v8u(<8 x i16> %u, i32 %n) #0 {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    str q0, [sp]
 ; CHECK-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-NEXT:    ldrh w8, [x8]

diff  --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
index 02f60f5aec33ab..4ab5db450a7f32 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll
@@ -5,10 +5,13 @@
 define {<2 x half>, <2 x half>} @vector_deinterleave_v2f16_v4f16(<4 x half> %vec) {
 ; CHECK-SD-LABEL: vector_deinterleave_v2f16_v4f16:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v2.2s, v0.s[1]
 ; CHECK-SD-NEXT:    mov v1.16b, v2.16b
 ; CHECK-SD-NEXT:    mov v1.h[0], v0.h[1]
 ; CHECK-SD-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_deinterleave_v2f16_v4f16:
@@ -26,6 +29,7 @@ define {<4 x half>, <4 x half>} @vector_deinterleave_v4f16_v8f16(<8 x half> %vec
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 v2.8h, v0.8h, v0.8h
 ; CHECK-NEXT:    uzp2 v1.8h, v0.8h, v0.8h
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    fmov d0, d2
 ; CHECK-NEXT:    ret
   %retval = call {<4 x half>, <4 x half>} @llvm.vector.deinterleave2.v8f16(<8 x half> %vec)
@@ -56,6 +60,7 @@ define {<2 x float>, <2 x float>} @vector_deinterleave_v2f32_v4f32(<4 x float> %
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    uzp1 v2.4s, v0.4s, v0.4s
 ; CHECK-GI-NEXT:    uzp2 v1.4s, v0.4s, v0.4s
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    fmov d0, d2
 ; CHECK-GI-NEXT:    ret
   %retval = call {<2 x float>, <2 x float>} @llvm.vector.deinterleave2.v4f32(<4 x float> %vec)

diff  --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index 961df36b11305d..a9618fdc2dec30 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -14,14 +14,18 @@ define <4 x half> @interleave2_v4f16(<2 x half> %vec0, <2 x half> %vec1) {
 define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) {
 ; CHECK-SD-LABEL: interleave2_v8f16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: interleave2_v8f16:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    zip1 v0.8h, v0.8h, v1.8h
 ; CHECK-GI-NEXT:    ret
   %retval = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %vec0, <4 x half> %vec1)
@@ -42,6 +46,8 @@ define <16 x half> @interleave2_v16f16(<8 x half> %vec0, <8 x half> %vec1) {
 define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
 ; CHECK-SD-LABEL: interleave2_v4f32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    rev64 v1.4s, v0.4s
 ; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
@@ -49,6 +55,8 @@ define <4 x float> @interleave2_v4f32(<2 x float> %vec0, <2 x float> %vec1) {
 ;
 ; CHECK-GI-LABEL: interleave2_v4f32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    zip1 v0.4s, v0.4s, v1.4s
 ; CHECK-GI-NEXT:    ret
   %retval = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %vec0, <2 x float> %vec1)

diff  --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
index c50c00ab4d1a3c..86c1474068482e 100644
--- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll
@@ -9,6 +9,8 @@ declare <2 x fp128> @llvm.maximum.v2f128(<2 x fp128>, <2 x fp128>)
 define <4 x half> @fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-LABEL: fmaximum_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h2, v1.h[1]
 ; CHECK-NEXT:    mov h3, v0.h[1]
 ; CHECK-NEXT:    mov h4, v1.h[2]
@@ -34,6 +36,7 @@ define <4 x half> @fmaximum_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NEXT:    fcvt h1, s1
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = call <4 x half> @llvm.maximum.v4f16(<4 x half> %x, <4 x half> %y)
   ret <4 x half> %r

diff  --git a/llvm/test/CodeGen/AArch64/fminimummaximum.ll b/llvm/test/CodeGen/AArch64/fminimummaximum.ll
index 2f608d77bf3893..fb12f8acf17453 100644
--- a/llvm/test/CodeGen/AArch64/fminimummaximum.ll
+++ b/llvm/test/CodeGen/AArch64/fminimummaximum.ll
@@ -129,20 +129,34 @@ entry:
 define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: min_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmin v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmin v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fmin d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fmin d2, d2, d5
 ; CHECK-GI-NEXT:    fmin v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.minimum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -152,20 +166,34 @@ entry:
 define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: max_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmax v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmax v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fmax d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fmax d2, d2, d5
 ; CHECK-GI-NEXT:    fmax v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.maximum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -269,56 +297,76 @@ entry:
 define <7 x float> @min_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: min_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    ldr s1, [sp, #24]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    fmin v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fmin v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fmin v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fmin v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.minimum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -328,56 +376,76 @@ entry:
 define <7 x float> @max_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: max_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    ldr s1, [sp, #24]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    fmax v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fmax v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fmax v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fmax v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.maximum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -421,6 +489,8 @@ entry:
 define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: min_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -446,6 +516,7 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: min_v4f16:
@@ -473,6 +544,8 @@ entry:
 define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: max_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -498,6 +571,7 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: max_v4f16:

diff  --git a/llvm/test/CodeGen/AArch64/fminmax.ll b/llvm/test/CodeGen/AArch64/fminmax.ll
index 6642ea37374fa9..64f0da8b4cd0f9 100644
--- a/llvm/test/CodeGen/AArch64/fminmax.ll
+++ b/llvm/test/CodeGen/AArch64/fminmax.ll
@@ -129,20 +129,34 @@ entry:
 define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: min_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fminnm v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fminnm v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fminnm d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fminnm d2, d2, d5
 ; CHECK-GI-NEXT:    fminnm v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.minnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -152,20 +166,34 @@ entry:
 define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: max_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmaxnm v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmaxnm v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fmaxnm d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fmaxnm d2, d2, d5
 ; CHECK-GI-NEXT:    fmaxnm v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.maxnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -269,56 +297,76 @@ entry:
 define <7 x float> @min_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: min_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    ldr s1, [sp, #24]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    fminnm v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fminnm v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: min_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fminnm v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fminnm v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.minnum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -328,56 +376,76 @@ entry:
 define <7 x float> @max_v7f32(<7 x float> %a, <7 x float> %b) {
 ; CHECK-SD-LABEL: max_v7f32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s7 killed $s7 def $q7
 ; CHECK-SD-NEXT:    mov x8, sp
-; CHECK-SD-NEXT:    ldr s1, [sp, #24]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[1], [x8]
+; CHECK-SD-NEXT:    ldr s1, [sp, #24]
 ; CHECK-SD-NEXT:    add x8, sp, #8
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
-; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[2], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #32
+; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[1], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #16
 ; CHECK-SD-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-SD-NEXT:    ld1 { v7.s }[3], [x8]
 ; CHECK-SD-NEXT:    add x8, sp, #40
-; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    ld1 { v1.s }[2], [x8]
+; CHECK-SD-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-SD-NEXT:    fmaxnm v4.4s, v4.4s, v1.4s
 ; CHECK-SD-NEXT:    fmaxnm v0.4s, v0.4s, v7.4s
 ; CHECK-SD-NEXT:    mov s5, v4.s[1]
 ; CHECK-SD-NEXT:    mov s6, v4.s[2]
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov s2, v0.s[2]
 ; CHECK-SD-NEXT:    mov s3, v0.s[3]
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: max_v7f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    ldr s16, [sp]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    ldr s17, [sp, #32]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #8]
-; CHECK-GI-NEXT:    ldr s17, [sp, #32]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[1], v16.s[0]
 ; CHECK-GI-NEXT:    ldr s16, [sp, #24]
-; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[1], v17.s[0]
+; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-GI-NEXT:    ldr s2, [sp, #40]
 ; CHECK-GI-NEXT:    mov v4.s[2], v6.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    ldr s1, [sp, #16]
-; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v16.s[2], v2.s[0]
+; CHECK-GI-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-GI-NEXT:    mov v7.s[3], v1.s[0]
 ; CHECK-GI-NEXT:    fmaxnm v4.4s, v4.4s, v16.4s
 ; CHECK-GI-NEXT:    fmaxnm v0.4s, v0.4s, v7.4s
 ; CHECK-GI-NEXT:    mov s5, v4.s[1]
 ; CHECK-GI-NEXT:    mov s6, v4.s[2]
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    mov s2, v0.s[2]
 ; CHECK-GI-NEXT:    mov s3, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <7 x float> @llvm.maxnum.v7f32(<7 x float> %a, <7 x float> %b)
@@ -421,6 +489,8 @@ entry:
 define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: min_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -446,6 +516,7 @@ define <4 x half> @min_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: min_v4f16:
@@ -473,6 +544,8 @@ entry:
 define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-LABEL: max_v4f16:
 ; CHECK-NOFP16-SD:       // %bb.0: // %entry
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP16-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-NOFP16-SD-NEXT:    mov h4, v1.h[2]
@@ -498,6 +571,7 @@ define <4 x half> @max_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NOFP16-SD-NEXT:    fcvt h1, s1
 ; CHECK-NOFP16-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NOFP16-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NOFP16-SD-NEXT:    ret
 ;
 ; CHECK-FP16-SD-LABEL: max_v4f16:

diff  --git a/llvm/test/CodeGen/AArch64/fmla.ll b/llvm/test/CodeGen/AArch64/fmla.ll
index 0c1ed2c07278dd..7bcaae5a77eac5 100644
--- a/llvm/test/CodeGen/AArch64/fmla.ll
+++ b/llvm/test/CodeGen/AArch64/fmla.ll
@@ -71,19 +71,34 @@ entry:
 define <3 x double> @fma_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
 ; CHECK-SD-LABEL: fma_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    ldr d3, [sp]
 ; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
 ; CHECK-SD-NEXT:    fmov d0, d6
+; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    fmov d2, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fma_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
@@ -280,6 +295,9 @@ entry:
 define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-SD-NOFP16-LABEL: fma_v4f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v2.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h4, v1.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h5, v0.h[1]
@@ -312,6 +330,7 @@ define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v3.h[0]
 ; CHECK-SD-NOFP16-NEXT:    fcvt h1, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fma_v4f16:
@@ -703,19 +722,34 @@ entry:
 define <3 x double> @fmuladd_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
 ; CHECK-SD-LABEL: fmuladd_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    ldr d3, [sp]
 ; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
 ; CHECK-SD-NEXT:    fmov d0, d6
+; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    fmov d2, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fmuladd_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
@@ -1136,19 +1170,34 @@ entry:
 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b, <3 x double> %c) {
 ; CHECK-SD-LABEL: fmul_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
 ; CHECK-SD-NEXT:    fmla v6.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    ldr d3, [sp]
 ; CHECK-SD-NEXT:    fmla v3.2d, v5.2d, v2.2d
-; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
 ; CHECK-SD-NEXT:    fmov d0, d6
+; CHECK-SD-NEXT:    ext v1.16b, v6.16b, v6.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    fmov d2, d3
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fmul_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]

diff  --git a/llvm/test/CodeGen/AArch64/fmul.ll b/llvm/test/CodeGen/AArch64/fmul.ll
index 945bd5209bf4c4..bd3d1353e643e5 100644
--- a/llvm/test/CodeGen/AArch64/fmul.ll
+++ b/llvm/test/CodeGen/AArch64/fmul.ll
@@ -68,20 +68,34 @@ entry:
 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b) {
 ; CHECK-SD-LABEL: fmul_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fmul v2.2d, v2.2d, v5.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmul v0.2d, v0.2d, v3.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fmul_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    fmul d2, d2, d5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    fmul d2, d2, d5
 ; CHECK-GI-NEXT:    fmul v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fmul <3 x double> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/fneg.ll b/llvm/test/CodeGen/AArch64/fneg.ll
index 35d91a5bb85a2e..de2671afe60ab7 100644
--- a/llvm/test/CodeGen/AArch64/fneg.ll
+++ b/llvm/test/CodeGen/AArch64/fneg.ll
@@ -27,9 +27,11 @@ entry:
 define half @fabs_f16(half %a) {
 ; CHECK-SD-NOFP16-LABEL: fabs_f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-SD-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-SD-NOFP16-NEXT:    eor w8, w8, #0x8000
 ; CHECK-SD-NOFP16-NEXT:    fmov s0, w8
+; CHECK-SD-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: fabs_f16:
@@ -39,9 +41,11 @@ define half @fabs_f16(half %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fabs_f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-GI-NOFP16-NEXT:    fmov w8, s0
 ; CHECK-GI-NOFP16-NEXT:    eor w8, w8, #0xffff8000
 ; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -66,18 +70,27 @@ entry:
 define <3 x double> @fabs_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fabs_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fneg v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fneg v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fabs_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fneg d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fneg v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fneg <3 x double> %a

diff  --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
index f8d1577b5e1fa0..a78addc490086d 100644
--- a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -52,11 +52,13 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    fmov w0, s1
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEON-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -64,6 +66,7 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    fmov s0, #9.00000000
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEON-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEON-NEXT:    mov w0, v0.s[3]
@@ -71,6 +74,7 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
 ; CHECK-NEON-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-NEON-NEXT:    bl ldexpf
 ; CHECK-NEON-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEON-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEON-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEON-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEON-NEXT:    mov v0.16b, v1.16b
@@ -220,6 +224,7 @@ define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #2 // =0x2
 ; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    fmov d1, #3.00000000
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    lsl w9, w9, w0
@@ -428,6 +433,7 @@ define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
 ; CHECK-LABEL: fmul_pow_shl_cnt_safe:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    and w8, w8, #0xffff
 ; CHECK-NEXT:    ucvtf d0, w8
@@ -535,6 +541,7 @@ define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
 ; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #3936146074321813504 // =0x36a0000000000000
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x8, x8, x0, lsl #52
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    ret
@@ -578,6 +585,7 @@ define fastcc i1 @quantum_hadamard(i32 %0) {
 ; CHECK-LABEL: quantum_hadamard:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sub x8, x8, x0, lsl #52
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fcvt s0, d0

diff  --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 40efc7389b04d2..d9d80f1cb50ee1 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -39,9 +39,9 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    cmp x8, #1000
 ; CHECK-NEXT:    ldp q2, q1, [x9]
-; CHECK-NEXT:    fcvtzu.4s v3, v1
-; CHECK-NEXT:    fcvtzu.4s v2, v2
-; CHECK-NEXT:    tbl.16b v1, { v2, v3 }, v0
+; CHECK-NEXT:    fcvtzu.4s v4, v1
+; CHECK-NEXT:    fcvtzu.4s v3, v2
+; CHECK-NEXT:    tbl.16b v1, { v3, v4 }, v0
 ; CHECK-NEXT:    str d1, [x1], #16
 ; CHECK-NEXT:    b.eq LBB0_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
@@ -252,12 +252,12 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-NEXT:    add x8, x8, #1
 ; CHECK-NEXT:    cmp x8, #1000
 ; CHECK-NEXT:    ldp q2, q1, [x9, #32]
-; CHECK-NEXT:    fcvtzu.4s v5, v1
+; CHECK-NEXT:    fcvtzu.4s v7, v1
 ; CHECK-NEXT:    ldp q1, q3, [x9]
-; CHECK-NEXT:    fcvtzu.4s v4, v2
-; CHECK-NEXT:    fcvtzu.4s v3, v3
-; CHECK-NEXT:    fcvtzu.4s v2, v1
-; CHECK-NEXT:    tbl.16b v1, { v2, v3, v4, v5 }, v0
+; CHECK-NEXT:    fcvtzu.4s v6, v2
+; CHECK-NEXT:    fcvtzu.4s v5, v3
+; CHECK-NEXT:    fcvtzu.4s v4, v1
+; CHECK-NEXT:    tbl.16b v1, { v4, v5, v6, v7 }, v0
 ; CHECK-NEXT:    str q1, [x1], #32
 ; CHECK-NEXT:    b.eq LBB4_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
@@ -316,20 +316,20 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) {
 ; CHECK-NEXT:    ldp q3, q4, [x9, #32]
 ; CHECK-NEXT:    ldp q5, q6, [x10]
 ; CHECK-NEXT:    fcvtzu.4s v19, v1
-; CHECK-NEXT:    ldp q7, q1, [x9]
-; CHECK-NEXT:    fcvtzu.4s v4, v4
 ; CHECK-NEXT:    fcvtzu.4s v18, v2
-; CHECK-NEXT:    fcvtzu.4s v3, v3
+; CHECK-NEXT:    ldp q2, q1, [x9]
+; CHECK-NEXT:    fcvtzu.4s v23, v4
 ; CHECK-NEXT:    fcvtzu.4s v17, v6
-; CHECK-NEXT:    fcvtzu.4s v16, v5
 ; CHECK-NEXT:    add x9, x2, x8, lsl #5
-; CHECK-NEXT:    fcvtzu.4s v2, v1
-; CHECK-NEXT:    fcvtzu.4s v1, v7
+; CHECK-NEXT:    fcvtzu.4s v22, v3
+; CHECK-NEXT:    fcvtzu.4s v16, v5
 ; CHECK-NEXT:    add x8, x8, #1
+; CHECK-NEXT:    fcvtzu.4s v21, v1
 ; CHECK-NEXT:    cmp x8, #1000
-; CHECK-NEXT:    tbl.16b v5, { v16, v17, v18, v19 }, v0
-; CHECK-NEXT:    tbl.16b v1, { v1, v2, v3, v4 }, v0
-; CHECK-NEXT:    stp q1, q5, [x9]
+; CHECK-NEXT:    fcvtzu.4s v20, v2
+; CHECK-NEXT:    tbl.16b v1, { v16, v17, v18, v19 }, v0
+; CHECK-NEXT:    tbl.16b v2, { v20, v21, v22, v23 }, v0
+; CHECK-NEXT:    stp q2, q1, [x9]
 ; CHECK-NEXT:    b.eq LBB5_1
 ; CHECK-NEXT:  ; %bb.2: ; %exit
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index 97f092d5bf9e84..83e60c10897624 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -702,6 +702,7 @@ define <1 x i64> @fptoui_v1i64_v1f64(<1 x double> %x) #0 {
 define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 ; CHECK-LABEL: sitofp_v1f64_v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    scvtf d0, w8
 ; CHECK-NEXT:    ret
@@ -712,6 +713,7 @@ define <1 x double> @sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 ; CHECK-LABEL: uitofp_v1f64_v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    ucvtf d0, w8
 ; CHECK-NEXT:    ret
@@ -722,6 +724,7 @@ define <1 x double> @uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
 define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 ; CHECK-LABEL: sitofp_v1f64_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    scvtf d0, x8
 ; CHECK-NEXT:    ret
@@ -732,6 +735,7 @@ define <1 x double> @sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 define <1 x double> @uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
 ; CHECK-LABEL: uitofp_v1f64_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    ucvtf d0, x8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
index 53afad929622fa..bb3f9a3e52a16b 100644
--- a/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
+++ b/llvm/test/CodeGen/AArch64/fp-maximumnum-minimumnum.ll
@@ -25,11 +25,20 @@ entry:
 define <3 x double> @max_nnan_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: max_nnan_v3f64:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    fmaxnm v2.2d, v2.2d, v5.2d
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    fmaxnm v0.2d, v0.2d, v3.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -91,19 +100,29 @@ entry:
 define <5 x float> @max_nnan_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: max_nnan_v5f32:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT:    mov x8, sp
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
-; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
 ; AARCH64-NEXT:    fmaxnm v4.4s, v4.4s, v2.4s
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    mov v0.s[3], v3.s[0]
 ; AARCH64-NEXT:    ld1 { v5.s }[3], [x8]
 ; AARCH64-NEXT:    fmaxnm v0.4s, v0.4s, v5.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -165,9 +184,17 @@ entry:
 define <9 x half> @max_nnan_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: max_nnan_v9f16:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
+; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
+; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
+; AARCH64-NEXT:    add x9, sp, #16
+; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
+; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
+; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
+; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
+; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
-; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]
@@ -234,11 +261,20 @@ entry:
 define <3 x double> @min_nnan_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: min_nnan_v3f64:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v5.2d
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    fminnm v0.2d, v0.2d, v3.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -300,19 +336,29 @@ entry:
 define <5 x float> @min_nnan_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: min_nnan_v5f32:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT:    mov x8, sp
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
-; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
 ; AARCH64-NEXT:    fminnm v4.4s, v4.4s, v2.4s
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    mov v0.s[3], v3.s[0]
 ; AARCH64-NEXT:    ld1 { v5.s }[3], [x8]
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v5.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call nnan <5 x float> @llvm.minimumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -374,9 +420,17 @@ entry:
 define <9 x half> @min_nnan_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: min_nnan_v9f16:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
+; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
+; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
+; AARCH64-NEXT:    add x9, sp, #16
+; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
+; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
+; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
+; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
+; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
-; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]
@@ -447,6 +501,12 @@ entry:
 define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: max_v3f64:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v2.2d
@@ -456,6 +516,9 @@ define <3 x double> @max_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-NEXT:    fminnm v1.2d, v5.2d, v5.2d
 ; AARCH64-NEXT:    fmaxnm v2.2d, v2.2d, v1.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.maximumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -529,9 +592,17 @@ entry:
 define <5 x float> @max_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: max_v5f32:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT:    mov x8, sp
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
-; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
@@ -542,10 +613,12 @@ define <5 x float> @max_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-NEXT:    fminnm v1.4s, v5.4s, v5.4s
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v0.4s
 ; AARCH64-NEXT:    fmaxnm v4.4s, v3.4s, v2.4s
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <5 x float> @llvm.maximumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -619,9 +692,17 @@ entry:
 define <9 x half> @max_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: max_v9f16:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
+; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
+; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
+; AARCH64-NEXT:    add x9, sp, #16
+; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
+; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
+; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
+; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
+; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
-; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]
@@ -700,6 +781,12 @@ entry:
 define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-LABEL: min_v3f64:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $d3 killed $d3 def $q3
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 def $q0
+; AARCH64-NEXT:    // kill: def $d4 killed $d4 def $q4
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 def $q1
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 def $q2
+; AARCH64-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; AARCH64-NEXT:    mov v0.d[1], v1.d[0]
 ; AARCH64-NEXT:    mov v3.d[1], v4.d[0]
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v2.2d
@@ -709,6 +796,9 @@ define <3 x double> @min_v3f64(<3 x double> %a, <3 x double> %b) {
 ; AARCH64-NEXT:    fminnm v1.2d, v5.2d, v5.2d
 ; AARCH64-NEXT:    fminnm v2.2d, v2.2d, v1.2d
 ; AARCH64-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; AARCH64-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; AARCH64-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; AARCH64-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.minimumnum.v3f64(<3 x double> %a, <3 x double> %b)
@@ -782,9 +872,17 @@ entry:
 define <5 x float> @min_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-LABEL: min_v5f32:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 def $q0
+; AARCH64-NEXT:    // kill: def $s5 killed $s5 def $q5
+; AARCH64-NEXT:    // kill: def $s1 killed $s1 def $q1
+; AARCH64-NEXT:    // kill: def $s6 killed $s6 def $q6
+; AARCH64-NEXT:    // kill: def $s2 killed $s2 def $q2
+; AARCH64-NEXT:    // kill: def $s7 killed $s7 def $q7
+; AARCH64-NEXT:    // kill: def $s3 killed $s3 def $q3
+; AARCH64-NEXT:    mov x8, sp
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; AARCH64-NEXT:    mov v0.s[1], v1.s[0]
 ; AARCH64-NEXT:    mov v5.s[1], v6.s[0]
-; AARCH64-NEXT:    mov x8, sp
 ; AARCH64-NEXT:    mov v0.s[2], v2.s[0]
 ; AARCH64-NEXT:    mov v5.s[2], v7.s[0]
 ; AARCH64-NEXT:    ldr s2, [sp, #8]
@@ -795,10 +893,12 @@ define <5 x float> @min_v5f32(<5 x float> %a, <5 x float> %b) {
 ; AARCH64-NEXT:    fminnm v1.4s, v5.4s, v5.4s
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v0.4s
 ; AARCH64-NEXT:    fminnm v4.4s, v3.4s, v2.4s
+; AARCH64-NEXT:    // kill: def $s4 killed $s4 killed $q4
 ; AARCH64-NEXT:    fminnm v0.4s, v0.4s, v1.4s
 ; AARCH64-NEXT:    mov s1, v0.s[1]
 ; AARCH64-NEXT:    mov s2, v0.s[2]
 ; AARCH64-NEXT:    mov s3, v0.s[3]
+; AARCH64-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; AARCH64-NEXT:    ret
 entry:
   %c = call <5 x float> @llvm.minimumnum.v5f32(<5 x float> %a, <5 x float> %b)
@@ -872,9 +972,17 @@ entry:
 define <9 x half> @min_v9f16(<9 x half> %a, <9 x half> %b) {
 ; AARCH64-LABEL: min_v9f16:
 ; AARCH64:       // %bb.0: // %entry
+; AARCH64-NEXT:    // kill: def $h0 killed $h0 def $q0
+; AARCH64-NEXT:    // kill: def $h1 killed $h1 def $q1
+; AARCH64-NEXT:    // kill: def $h2 killed $h2 def $q2
+; AARCH64-NEXT:    add x9, sp, #16
+; AARCH64-NEXT:    // kill: def $h3 killed $h3 def $q3
+; AARCH64-NEXT:    // kill: def $h4 killed $h4 def $q4
+; AARCH64-NEXT:    // kill: def $h5 killed $h5 def $q5
+; AARCH64-NEXT:    // kill: def $h6 killed $h6 def $q6
+; AARCH64-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; AARCH64-NEXT:    mov v0.h[1], v1.h[0]
 ; AARCH64-NEXT:    ldr h1, [sp, #8]
-; AARCH64-NEXT:    add x9, sp, #16
 ; AARCH64-NEXT:    ld1 { v1.h }[1], [x9]
 ; AARCH64-NEXT:    add x9, sp, #24
 ; AARCH64-NEXT:    mov v0.h[2], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index a0b67f11c485fd..d4130e7a848b15 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -427,6 +427,7 @@ define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
 define void @test_insert_at_zero(half %a, ptr %b) #0 {
 ; CHECK-LABEL: test_insert_at_zero:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
   %1 = insertelement <8 x half> undef, half %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
index 6aee101ebd2638..97c3a4937cda74 100644
--- a/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-vector-shuffle.ll
@@ -41,7 +41,10 @@ entry:
 define <4 x half> @lane_64_64(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: lane_64_64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[2]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
@@ -54,6 +57,7 @@ entry:
 define <8 x half> @lane_128_64(<8 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: lane_128_64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -71,7 +75,9 @@ entry:
 define <4 x half> @lane_64_128(<4 x half> %a, <8 x half> %b) #0 {
 ; CHECK-LABEL: lane_64_128:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[5]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast <8 x half> %b to <8 x i16>
@@ -188,6 +194,7 @@ entry:
 define <4 x half> @dup_64(half %a) #0 {
 ; CHECK-LABEL: dup_64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -202,6 +209,7 @@ entry:
 define <8 x half> @dup_128(half %a) #0 {
 ; CHECK-LABEL: dup_128:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -220,6 +228,7 @@ entry:
 define <4 x half> @dup_lane_64(<4 x half> %a) #0 {
 ; CHECK-LABEL: dup_lane_64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -231,6 +240,7 @@ entry:
 define <8 x half> @dup_lane_128(<4 x half> %a) #0 {
 ; CHECK-LABEL: dup_lane_128:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -264,6 +274,8 @@ entry:
 define <8 x half> @vcombine(<4 x half> %a, <4 x half> %b) #0 {
 ; CHECK-LABEL: vcombine:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -276,6 +288,7 @@ define <4 x half> @get_high(<8 x half> %a) #0 {
 ; CHECK-LABEL: get_high:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -287,6 +300,7 @@ entry:
 define <4 x half> @get_low(<8 x half> %a) #0 {
 ; CHECK-LABEL: get_low:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -297,8 +311,11 @@ entry:
 define <4 x half> @set_lane_64(<4 x half> %a, half %b) #0 {
 ; CHECK-LABEL: set_lane_64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fmov w8, s1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[2], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast half %b to i16
@@ -313,6 +330,7 @@ entry:
 define <8 x half> @set_lane_128(<8 x half> %a, half %b) #0 {
 ; CHECK-LABEL: set_lane_128:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $s1
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    ret
@@ -328,8 +346,10 @@ entry:
 define half @get_lane_64(<4 x half> %a) #0 {
 ; CHECK-LABEL: get_lane_64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.h[2]
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast <4 x half> %a to <4 x i16>
@@ -344,6 +364,7 @@ define half @get_lane_128(<8 x half> %a) #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umov w8, v0.h[2]
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast <8 x half> %a to <8 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
index 8e1d67b750743a..368683e2b93af4 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll
@@ -11,6 +11,7 @@ declare half @llvm.fma.f16(half, half, half) #1
 define <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfma_lane_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -22,6 +23,7 @@ entry:
 define <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmaq_lane_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -55,6 +57,7 @@ entry:
 define <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfma_n_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmla v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -67,6 +70,7 @@ entry:
 define <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfmaq_n_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmla v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -79,6 +83,7 @@ entry:
 define half @t_vfmah_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmah_lane_f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd h0, h1, h2, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -90,6 +95,7 @@ entry:
 define half @t_vfmah_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmah_lane_f16_0_swap:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd h0, h2, h1, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -101,6 +107,7 @@ entry:
 define half @t_vfmah_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmah_lane_f16_3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -112,6 +119,7 @@ entry:
 define half @t_vfmah_lane_f16_3_0(half %a, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmah_lane_f16_3_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmla h0, h1, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -124,6 +132,8 @@ entry:
 define half @t_vfmah_lane_f16_0_0(half %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmah_lane_f16_0_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmadd h0, h1, h2, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -169,6 +179,7 @@ entry:
 define <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfms_lane_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -181,6 +192,7 @@ entry:
 define <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsq_lane_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -217,6 +229,7 @@ entry:
 define <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfms_n_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmls v0.4h, v1.4h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -230,6 +243,7 @@ entry:
 define <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
 ; CHECK-LABEL: t_vfmsq_n_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    fmls v0.8h, v1.8h, v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -243,6 +257,7 @@ entry:
 define half @t_vfmsh_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub h0, h2, h1, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -255,6 +270,7 @@ entry:
 define half @t_vfmsh_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_0_swap:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub h0, h2, h1, h0
 ; CHECK-NEXT:    ret
 entry:
@@ -267,6 +283,7 @@ entry:
 define half @t_vfmsh_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -291,6 +308,7 @@ entry:
 define half @t_vfmsh_lane_f16_0_3(half %a, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vfmsh_lane_f16_0_3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmls h0, h1, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -350,6 +368,7 @@ entry:
 define half @t_vmulh_lane0_f16(half %a, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vmulh_lane0_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul h0, h0, h1
 ; CHECK-NEXT:    ret
 entry:
@@ -361,6 +380,7 @@ entry:
 define half @t_vmulh_lane3_f16(half %a, <4 x half> %c, i32 %lane) {
 ; CHECK-LABEL: t_vmulh_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmul h0, h0, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -404,6 +424,7 @@ entry:
 define half @t_vmulxh_lane0_f16(half %a, <4 x half> %b) {
 ; CHECK-LABEL: t_vmulxh_lane0_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx h0, h0, h1
 ; CHECK-NEXT:    ret
 entry:
@@ -415,6 +436,7 @@ entry:
 define half @t_vmulxh_lane3_f16(half %a, <4 x half> %b, i32 %lane) {
 ; CHECK-LABEL: t_vmulxh_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx h0, h0, v1.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -426,6 +448,7 @@ entry:
 define <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
 ; CHECK-LABEL: t_vmulx_lane_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -437,6 +460,7 @@ entry:
 define <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
 ; CHECK-LABEL: t_vmulxq_lane_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -492,6 +516,7 @@ entry:
 define <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
 ; CHECK-LABEL: t_vmulx_n_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    dup v1.4h, v1.h[0]
 ; CHECK-NEXT:    fmulx v0.4h, v0.4h, v1.4h
 ; CHECK-NEXT:    ret
@@ -505,6 +530,7 @@ entry:
 define <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
 ; CHECK-LABEL: t_vmulxq_n_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    dup v1.8h, v1.h[0]
 ; CHECK-NEXT:    fmulx v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ret
@@ -518,6 +544,7 @@ entry:
 define half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmah_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -540,6 +567,7 @@ entry:
 define half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
 ; CHECK-LABEL: t_vfmsh_lane3_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmls h0, h1, v2.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -577,6 +605,7 @@ entry:
 define half @test_fmulx_horizontal_f16(<2 x half> %v) {
 ; CHECK-LABEL: test_fmulx_horizontal_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmulx h0, h0, v0.h[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
index ccea80cd396bc3..2ffba10e211007 100644
--- a/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
@@ -7,6 +7,8 @@ target triple = "aarch64-linux"
 define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
 ; CHECK-LABEL: cvtn_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvtn z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
@@ -16,6 +18,8 @@ define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bf
 define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
 ; CHECK-LABEL: cvtn_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtn z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
@@ -25,6 +29,8 @@ define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half>
 define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
 ; CHECK-LABEL: cvtnb_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtnb z0.b, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
     %r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)

diff  --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index 85c9a0cd881322..9157bcba59e9bb 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -28,6 +28,7 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) {
 ; CHECK-NEXT:    fcvtzu w9, d1
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i64>
@@ -352,6 +353,7 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -383,6 +385,7 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -444,12 +447,14 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -480,7 +485,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -519,6 +526,7 @@ entry:
 define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-CVT-LABEL: stest_f16i64:
 ; CHECK-CVT:       // %bb.0: // %entry
+; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvt s1, h1
@@ -530,6 +538,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ;
 ; CHECK-FP16-LABEL: stest_f16i64:
 ; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-FP16-NEXT:    fcvtzs x8, h0
 ; CHECK-FP16-NEXT:    fcvtzs x9, h1
@@ -556,12 +565,14 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -592,7 +603,9 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixhfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -654,6 +667,7 @@ define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
 ; CHECK-NEXT:    fcvtzu w9, d1
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w9
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i64>
@@ -948,6 +962,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixunsdfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -978,6 +993,7 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl __fixdfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -1033,12 +1049,14 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixunssfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -1068,7 +1086,9 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl __fixsfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
@@ -1103,6 +1123,7 @@ entry:
 define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ; CHECK-CVT-LABEL: stest_f16i64_mm:
 ; CHECK-CVT:       // %bb.0: // %entry
+; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-CVT-NEXT:    fcvt s0, h0
 ; CHECK-CVT-NEXT:    fcvt s1, h1
@@ -1114,6 +1135,7 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) {
 ;
 ; CHECK-FP16-LABEL: stest_f16i64_mm:
 ; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-FP16-NEXT:    fcvtzs x8, h0
 ; CHECK-FP16-NEXT:    fcvtzs x9, h1
@@ -1138,12 +1160,14 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov h0, v0.h[1]
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    mov x20, x1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixunshfti
 ; CHECK-NEXT:    cmp x1, #0
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -1173,7 +1197,9 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    bl __fixhfti
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov x19, x0

diff  --git a/llvm/test/CodeGen/AArch64/fpext.ll b/llvm/test/CodeGen/AArch64/fpext.ll
index 2715f3d2f26b6a..df90f9d5f09109 100644
--- a/llvm/test/CodeGen/AArch64/fpext.ll
+++ b/llvm/test/CodeGen/AArch64/fpext.ll
@@ -74,8 +74,10 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v3.2d, v0.2s
 ; CHECK-SD-NEXT:    fcvtl2 v2.2d, v0.4s
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fpext_v3f32_v3f64:
@@ -84,6 +86,7 @@ define <3 x double> @fpext_v3f32_v3f64(<3 x float> %a) {
 ; CHECK-GI-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-GI-NEXT:    fcvt d2, s1
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fpext <3 x float> %a to <3 x double>
@@ -97,7 +100,9 @@ define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendhftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -132,9 +137,11 @@ define <4 x fp128> @fpext_v4f16_v4f128(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendhftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -168,6 +175,7 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
@@ -176,6 +184,7 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __extendsftf2
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -202,6 +211,7 @@ define <4 x fp128> @fpext_v4f32_v4f128(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __extendsftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
@@ -234,6 +244,7 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
 ; CHECK-SD-NEXT:    str q1, [sp, #48] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -241,6 +252,7 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __extenddftf2
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
@@ -265,12 +277,14 @@ define <4 x fp128> @fpext_v4f64_v4f128(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __extenddftf2
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
@@ -314,6 +328,7 @@ define <2 x double> @fpext_v2f16_v2f64(<2 x half> %a) {
 ;
 ; CHECK-GI-LABEL: fpext_v2f16_v2f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    fcvt d0, h0
 ; CHECK-GI-NEXT:    fcvt d1, h1
@@ -330,11 +345,15 @@ define <3 x double> @fpext_v3f16_v3f64(<3 x half> %a) {
 ; CHECK-SD-NEXT:    fcvtl v1.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtl v0.2d, v1.2s
 ; CHECK-SD-NEXT:    fcvtl2 v2.2d, v1.4s
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fpext_v3f16_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NEXT:    fcvt d0, h0
@@ -356,6 +375,7 @@ define <4 x double> @fpext_v4f16_v4f64(<4 x half> %a) {
 ;
 ; CHECK-GI-LABEL: fpext_v4f16_v4f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NEXT:    mov h3, v0.h[3]
@@ -375,6 +395,7 @@ define <2 x float> @fpext_v2f16_v2f32(<2 x half> %a) {
 ; CHECK-LABEL: fpext_v2f16_v2f32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = fpext <2 x half> %a to <2 x float>

diff  --git a/llvm/test/CodeGen/AArch64/fpmode.ll b/llvm/test/CodeGen/AArch64/fpmode.ll
index 509bd5ca1d749a..b185d9e0494111 100644
--- a/llvm/test/CodeGen/AArch64/fpmode.ll
+++ b/llvm/test/CodeGen/AArch64/fpmode.ll
@@ -10,6 +10,7 @@ define i32 @func_get_fpmode() #0 {
 ; DAG-LABEL: func_get_fpmode:
 ; DAG:       // %bb.0: // %entry
 ; DAG-NEXT:    mrs x0, FPCR
+; DAG-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; DAG-NEXT:    ret
 ;
 ; GIS-LABEL: func_get_fpmode:

diff  --git a/llvm/test/CodeGen/AArch64/fpow.ll b/llvm/test/CodeGen/AArch64/fpow.ll
index 4eeeb4d8fafeec..dc93d5be9b3f38 100644
--- a/llvm/test/CodeGen/AArch64/fpow.ll
+++ b/llvm/test/CodeGen/AArch64/fpow.ll
@@ -72,10 +72,14 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl pow
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #64
@@ -92,12 +96,16 @@ define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d9
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl pow
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -198,18 +206,25 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl pow
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl pow
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl pow
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp, #16] // 32-byte Folded Reload
@@ -237,28 +252,35 @@ define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d10, v2.d[1]
 ; CHECK-GI-NEXT:    mov d11, v3.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    mov d9, v4.d[1]
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d10
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl pow
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    fmov d1, d11
+; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl pow
-; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
@@ -274,16 +296,23 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -296,14 +325,20 @@ define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    .cfi_offset b8, -24
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s9
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -326,10 +361,14 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -337,6 +376,7 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -357,18 +397,23 @@ define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s10, v1.s[1]
 ; CHECK-GI-NEXT:    mov s11, v1.s[2]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s10
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
@@ -393,10 +438,14 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -404,6 +453,7 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -411,6 +461,7 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -436,22 +487,28 @@ define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
@@ -480,10 +537,14 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s1, v2.s[1]
 ; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -491,6 +552,7 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -498,16 +560,21 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl powf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -515,6 +582,7 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl powf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -552,6 +620,7 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v3.s[1]
 ; CHECK-GI-NEXT:    mov s11, v3.s[2]
 ; CHECK-GI-NEXT:    mov s2, v4.s[1]
@@ -560,39 +629,50 @@ define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s14, v1.s[1]
 ; CHECK-GI-NEXT:    mov s15, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    str s2, [sp, #48] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v4.s[2]
 ; CHECK-GI-NEXT:    str s2, [sp, #112] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v3.s[3]
 ; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s14
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s15
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #48] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    bl powf
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #192] // 8-byte Folded Reload
@@ -814,6 +894,8 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
@@ -853,6 +935,7 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -871,6 +954,8 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    .cfi_offset b12, -56
 ; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -908,7 +993,8 @@ define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1472,23 +1558,22 @@ define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) {
 ; CHECK-GI-NEXT:    fcvt s1, h0
 ; CHECK-GI-NEXT:    fmov s0, s2
 ; CHECK-GI-NEXT:    bl powf
-; CHECK-GI-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #304] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #320] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fpowi.ll b/llvm/test/CodeGen/AArch64/fpowi.ll
index f5def57c1cfb90..8948556d1b380a 100644
--- a/llvm/test/CodeGen/AArch64/fpowi.ll
+++ b/llvm/test/CodeGen/AArch64/fpowi.ll
@@ -71,12 +71,15 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powidf2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __powidf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
@@ -92,12 +95,15 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __powidf2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl __powidf2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -189,20 +195,25 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powidf2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __powidf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl __powidf2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __powidf2
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -225,26 +236,31 @@ define <4 x double> @powi_v4f64(<4 x double> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
 ; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __powidf2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl __powidf2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __powidf2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl __powidf2
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -261,17 +277,22 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
+; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl __powisf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -284,14 +305,18 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    .cfi_offset w19, -8
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -315,11 +340,14 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -327,6 +355,7 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -346,16 +375,20 @@ define <3 x float> @powi_v3f32(<3 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov w19, w0
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -380,11 +413,14 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -392,6 +428,7 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -399,6 +436,7 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -421,20 +459,25 @@ define <4 x float> @powi_v4f32(<4 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov w19, w0
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
@@ -462,11 +505,14 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov w19, w0
 ; CHECK-SD-NEXT:    bl __powisf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
@@ -474,6 +520,7 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -481,17 +528,21 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __powisf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w0, w19
+; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -499,6 +550,7 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl __powisf2
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov w0, w19
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
@@ -537,36 +589,46 @@ define <8 x float> @powi_v8f32(<8 x float> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov w19, w0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl __powisf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    mov w0, w19
+; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl __powisf2
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #32] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #160] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
@@ -763,6 +825,7 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov w19, w0
@@ -797,6 +860,7 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -812,6 +876,7 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -24
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    .cfi_offset b10, -48
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov w19, w0
@@ -845,7 +910,8 @@ define <4 x half> @powi_v4f16(<4 x half> %a, i32 %b) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #96
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1321,21 +1387,20 @@ define <16 x half> @powi_v16f16(<16 x half> %a, i32 %b) {
 ; CHECK-GI-NEXT:    str q0, [sp, #176] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl __powisf2
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x29, [sp, #304] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #320] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 5df93979877ecf..9c4f0207b84ce8 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -707,18 +707,29 @@ entry:
 define <3 x i64> @fptos_v3f64_v3i64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptos_v3f64_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v3f64_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i64>
@@ -728,18 +739,29 @@ entry:
 define <3 x i64> @fptou_v3f64_v3i64(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v3f64_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v3f64_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i64>
@@ -995,6 +1017,9 @@ entry:
 define <3 x i32> @fptos_v3f64_v3i32(<3 x double> %a) {
 ; CHECK-LABEL: fptos_v3f64_v3i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1008,6 +1033,9 @@ entry:
 define <3 x i32> @fptou_v3f64_v3i32(<3 x double> %a) {
 ; CHECK-LABEL: fptou_v3f64_v3i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
@@ -1349,6 +1377,9 @@ entry:
 define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) {
 ; CHECK-LABEL: fptos_v3f64_v3i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1363,6 +1394,9 @@ entry:
 define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v3f64_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1372,6 +1406,9 @@ define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptou_v3f64_v3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
@@ -1433,12 +1470,12 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI70_0
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI70_0]
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    xtn v6.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v0.2d
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI70_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v8f64_v8i16:
@@ -1464,12 +1501,12 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI71_0
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI71_0]
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    xtn v6.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v5.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v4.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v3.2s, v0.2d
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI71_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v8f64_v8i16:
@@ -1495,21 +1532,21 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI72_0
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI72_0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT:    xtn v19.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v17.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v5.2d
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI72_0]
+; CHECK-SD-NEXT:    xtn v16.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v4.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v16f64_v16i16:
@@ -1542,21 +1579,21 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) {
 ; CHECK-SD-NEXT:    adrp x8, .LCPI73_0
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI73_0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
-; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-SD-NEXT:    xtn v19.2s, v3.2d
+; CHECK-SD-NEXT:    xtn v23.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v18.2s, v2.2d
+; CHECK-SD-NEXT:    xtn v22.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v17.2s, v1.2d
+; CHECK-SD-NEXT:    xtn v21.2s, v5.2d
+; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI73_0]
+; CHECK-SD-NEXT:    xtn v16.2s, v0.2d
+; CHECK-SD-NEXT:    xtn v20.2s, v4.2d
+; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v16f64_v16i16:
@@ -1584,48 +1621,65 @@ entry:
 define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) {
 ; CHECK-SD-LABEL: fptos_v32f64_v32i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ldp q16, q17, [sp, #64]
+; CHECK-SD-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset b8, -8
+; CHECK-SD-NEXT:    .cfi_offset b9, -16
+; CHECK-SD-NEXT:    .cfi_offset b10, -24
+; CHECK-SD-NEXT:    .cfi_offset b11, -32
+; CHECK-SD-NEXT:    .cfi_offset b12, -40
+; CHECK-SD-NEXT:    .cfi_offset b13, -48
+; CHECK-SD-NEXT:    .cfi_offset b14, -56
+; CHECK-SD-NEXT:    .cfi_offset b15, -64
 ; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-SD-NEXT:    ldp q18, q19, [sp, #96]
-; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-SD-NEXT:    ldp q20, q21, [sp, #32]
-; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-SD-NEXT:    ldp q22, q23, [sp]
-; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI74_0
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #160]
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v0.2d
+; CHECK-SD-NEXT:    ldp q23, q24, [sp, #96]
 ; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #128]
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
 ; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v18.2d
+; CHECK-SD-NEXT:    ldp q18, q25, [sp, #64]
+; CHECK-SD-NEXT:    xtn v1.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v24.2d
 ; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v23.2d
+; CHECK-SD-NEXT:    xtn v29.2s, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v25.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
 ; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
-; CHECK-SD-NEXT:    adrp x8, .LCPI74_0
-; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v21.2s, v21.2d
-; CHECK-SD-NEXT:    xtn v25.2s, v19.2d
-; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
-; CHECK-SD-NEXT:    xtn v20.2s, v20.2d
-; CHECK-SD-NEXT:    xtn v24.2s, v18.2d
-; CHECK-SD-NEXT:    xtn v19.2s, v23.2d
-; CHECK-SD-NEXT:    xtn v23.2s, v17.2d
-; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
-; CHECK-SD-NEXT:    xtn v18.2s, v22.2d
-; CHECK-SD-NEXT:    xtn v22.2s, v16.2d
-; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI74_0]
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    xtn v15.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v11.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    xtn v14.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v10.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v13.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v9.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v28.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v8.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v12.2s, v16.2d
+; CHECK-SD-NEXT:    xtn v27.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v26.2s, v4.2d
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI74_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v32f64_v32i16:
@@ -1671,48 +1725,65 @@ entry:
 define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v32f64_v32i16:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    ldp q16, q17, [sp, #64]
+; CHECK-SD-NEXT:    stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-SD-NEXT:    .cfi_offset b8, -8
+; CHECK-SD-NEXT:    .cfi_offset b9, -16
+; CHECK-SD-NEXT:    .cfi_offset b10, -24
+; CHECK-SD-NEXT:    .cfi_offset b11, -32
+; CHECK-SD-NEXT:    .cfi_offset b12, -40
+; CHECK-SD-NEXT:    .cfi_offset b13, -48
+; CHECK-SD-NEXT:    .cfi_offset b14, -56
+; CHECK-SD-NEXT:    .cfi_offset b15, -64
 ; CHECK-SD-NEXT:    fcvtzs v3.2d, v3.2d
-; CHECK-SD-NEXT:    ldp q18, q19, [sp, #96]
-; CHECK-SD-NEXT:    fcvtzs v2.2d, v2.2d
-; CHECK-SD-NEXT:    ldp q20, q21, [sp, #32]
-; CHECK-SD-NEXT:    fcvtzs v1.2d, v1.2d
-; CHECK-SD-NEXT:    ldp q22, q23, [sp]
-; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v2.2d
+; CHECK-SD-NEXT:    adrp x8, .LCPI75_0
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v1.2d
+; CHECK-SD-NEXT:    ldp q20, q21, [sp, #160]
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v0.2d
+; CHECK-SD-NEXT:    ldp q23, q24, [sp, #96]
 ; CHECK-SD-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
-; CHECK-SD-NEXT:    fcvtzs v19.2d, v19.2d
+; CHECK-SD-NEXT:    ldp q16, q17, [sp, #128]
+; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
 ; CHECK-SD-NEXT:    fcvtzs v21.2d, v21.2d
-; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
 ; CHECK-SD-NEXT:    fcvtzs v20.2d, v20.2d
-; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
-; CHECK-SD-NEXT:    fcvtzs v23.2d, v23.2d
+; CHECK-SD-NEXT:    xtn v2.2s, v18.2d
+; CHECK-SD-NEXT:    ldp q18, q25, [sp, #64]
+; CHECK-SD-NEXT:    xtn v1.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v19.2d, v24.2d
 ; CHECK-SD-NEXT:    fcvtzs v17.2d, v17.2d
-; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
-; CHECK-SD-NEXT:    fcvtzs v22.2d, v22.2d
+; CHECK-SD-NEXT:    xtn v0.2s, v22.2d
+; CHECK-SD-NEXT:    fcvtzs v22.2d, v23.2d
+; CHECK-SD-NEXT:    xtn v29.2s, v7.2d
+; CHECK-SD-NEXT:    fcvtzs v7.2d, v25.2d
+; CHECK-SD-NEXT:    fcvtzs v6.2d, v6.2d
+; CHECK-SD-NEXT:    fcvtzs v18.2d, v18.2d
 ; CHECK-SD-NEXT:    fcvtzs v16.2d, v16.2d
-; CHECK-SD-NEXT:    xtn v3.2s, v3.2d
-; CHECK-SD-NEXT:    xtn v2.2s, v2.2d
-; CHECK-SD-NEXT:    adrp x8, .LCPI75_0
-; CHECK-SD-NEXT:    xtn v1.2s, v1.2d
-; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
-; CHECK-SD-NEXT:    xtn v7.2s, v7.2d
-; CHECK-SD-NEXT:    xtn v6.2s, v6.2d
-; CHECK-SD-NEXT:    xtn v21.2s, v21.2d
-; CHECK-SD-NEXT:    xtn v25.2s, v19.2d
-; CHECK-SD-NEXT:    xtn v5.2s, v5.2d
-; CHECK-SD-NEXT:    xtn v20.2s, v20.2d
-; CHECK-SD-NEXT:    xtn v24.2s, v18.2d
-; CHECK-SD-NEXT:    xtn v19.2s, v23.2d
-; CHECK-SD-NEXT:    xtn v23.2s, v17.2d
-; CHECK-SD-NEXT:    xtn v4.2s, v4.2d
-; CHECK-SD-NEXT:    xtn v18.2s, v22.2d
-; CHECK-SD-NEXT:    xtn v22.2s, v16.2d
-; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI75_0]
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b
-; CHECK-SD-NEXT:    tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b
+; CHECK-SD-NEXT:    fcvtzs v5.2d, v5.2d
+; CHECK-SD-NEXT:    xtn v15.2s, v21.2d
+; CHECK-SD-NEXT:    xtn v11.2s, v19.2d
+; CHECK-SD-NEXT:    fcvtzs v4.2d, v4.2d
+; CHECK-SD-NEXT:    xtn v14.2s, v20.2d
+; CHECK-SD-NEXT:    xtn v10.2s, v22.2d
+; CHECK-SD-NEXT:    xtn v13.2s, v17.2d
+; CHECK-SD-NEXT:    xtn v9.2s, v7.2d
+; CHECK-SD-NEXT:    xtn v28.2s, v6.2d
+; CHECK-SD-NEXT:    xtn v8.2s, v18.2d
+; CHECK-SD-NEXT:    xtn v12.2s, v16.2d
+; CHECK-SD-NEXT:    xtn v27.2s, v5.2d
+; CHECK-SD-NEXT:    xtn v26.2s, v4.2d
+; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI75_0]
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b
+; CHECK-SD-NEXT:    tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b
+; CHECK-SD-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    ldp d15, d14, [sp], #64 // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v32f64_v32i16:
@@ -1786,6 +1857,9 @@ entry:
 define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptos_v3f64_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1798,13 +1872,19 @@ define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptos_v3f64_v3i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    mov d2, v0.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x double> %a to <3 x i8>
@@ -1814,6 +1894,9 @@ entry:
 define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptou_v3f64_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtzs v1.2d, v2.2d
 ; CHECK-SD-NEXT:    fcvtzs v0.2d, v0.2d
@@ -1826,13 +1909,19 @@ define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptou_v3f64_v3i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    mov d2, v0.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-GI-NEXT:    fmov x1, d2
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x double> %a to <3 x i8>
@@ -2198,6 +2287,7 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -2224,6 +2314,7 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixdfti
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -2253,6 +2344,7 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -2279,6 +2371,7 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunsdfti
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -2484,8 +2577,10 @@ define <3 x i64> @fptos_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-SD-NEXT:    fcvtl2 v0.2d, v0.4s
 ; CHECK-SD-NEXT:    fcvtzs v3.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzs v2.2d, v0.2d
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v3f32_v3i64:
@@ -2496,6 +2591,8 @@ define <3 x i64> @fptos_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fcvtzs v2.2d, v1.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptosi <3 x float> %a to <3 x i64>
@@ -2509,8 +2606,10 @@ define <3 x i64> @fptou_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-SD-NEXT:    fcvtl2 v0.2d, v0.4s
 ; CHECK-SD-NEXT:    fcvtzu v3.2d, v1.2d
 ; CHECK-SD-NEXT:    fcvtzu v2.2d, v0.2d
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v3f32_v3i64:
@@ -2521,6 +2620,8 @@ define <3 x i64> @fptou_v3f32_v3i64(<3 x float> %a) {
 ; CHECK-GI-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fcvtzu v2.2d, v1.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptoui <3 x float> %a to <3 x i64>
@@ -3565,7 +3666,9 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -3591,7 +3694,9 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w20, -16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3620,7 +3725,9 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -3646,7 +3753,9 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w20, -16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -24
 ; CHECK-GI-NEXT:    .cfi_offset b8, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3680,10 +3789,12 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
@@ -3718,6 +3829,7 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -64
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3758,10 +3870,12 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
 ; CHECK-SD-NEXT:    mov x20, x1
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x21, x0
@@ -3796,6 +3910,7 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -64
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    mov x19, x0
@@ -3824,6 +3939,7 @@ entry:
 define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -3835,6 +3951,7 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
 ; CHECK-SD-FP16-NEXT:    fcvtzs x9, h1
@@ -3851,6 +3968,7 @@ define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d0, h0
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h1
@@ -3865,6 +3983,7 @@ entry:
 define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -3876,6 +3995,7 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
 ; CHECK-SD-FP16-NEXT:    fcvtzu x9, h1
@@ -3892,6 +4012,7 @@ define <2 x i64> @fptou_v2f16_v2i64(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d0, h0
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h1
@@ -3906,6 +4027,7 @@ entry:
 define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
@@ -3921,6 +4043,7 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fcvtzs x8, h0
@@ -3938,11 +4061,14 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v0.2d, v1.2d
 ; CHECK-GI-NOFP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NOFP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h0
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[2]
@@ -3950,10 +4076,12 @@ define <3 x i64> @fptos_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h3
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT:    fcvtzs v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <3 x half> %a to <3 x i64>
@@ -3963,6 +4091,7 @@ entry:
 define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s0, h0
@@ -3978,6 +4107,7 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fcvtzu x8, h0
@@ -3995,11 +4125,14 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-NOFP16-NEXT:    fcvtl2 v2.2d, v0.4s
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v0.2d, v1.2d
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NOFP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvt d1, h0
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[2]
@@ -4007,10 +4140,12 @@ define <3 x i64> @fptou_v3f16_v3i64(<3 x half> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v2.d[0]
 ; CHECK-GI-FP16-NEXT:    fcvt d2, h3
-; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-FP16-NEXT:    fcvtzu v2.2d, v2.2d
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <3 x half> %a to <3 x i64>
@@ -4020,6 +4155,7 @@ entry:
 define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[3]
@@ -4039,6 +4175,7 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -4063,6 +4200,7 @@ define <4 x i64> @fptos_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -4083,6 +4221,7 @@ entry:
 define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-NOFP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[3]
@@ -4102,6 +4241,7 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-SD-FP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -4126,6 +4266,7 @@ define <4 x i64> @fptou_v4f16_v4i64(<4 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -5556,6 +5697,7 @@ define <2 x i32> @fptos_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptos_v2f16_v2i32:
@@ -5573,6 +5715,7 @@ define <2 x i32> @fptou_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptou_v2f16_v2i32:
@@ -5824,6 +5967,7 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i16:
@@ -5836,6 +5980,7 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i16>
@@ -5847,6 +5992,7 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i16:
@@ -5859,6 +6005,7 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i16>
@@ -6280,6 +6427,7 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i8:
@@ -6292,6 +6440,7 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzs v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptosi <2 x half> %a to <2 x i8>
@@ -6303,6 +6452,7 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i8:
@@ -6315,6 +6465,7 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) {
 ; CHECK-GI-FP16:       // %bb.0: // %entry
 ; CHECK-GI-FP16-NEXT:    fcvtzu v0.4h, v0.4h
 ; CHECK-GI-FP16-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = fptoui <2 x half> %a to <2 x i8>
@@ -6799,7 +6950,9 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -6817,6 +6970,7 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v2f16_v2i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
@@ -6828,6 +6982,7 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-GI-FP16-NEXT:    fcvtzs x2, h1
@@ -6849,7 +7004,9 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w19, -8
 ; CHECK-SD-NEXT:    .cfi_offset w20, -16
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -6867,10 +7024,11 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v2f16_v2i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    mov x1, xzr
 ; CHECK-GI-NOFP16-NEXT:    mov x3, xzr
+; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu x0, s0
 ; CHECK-GI-NOFP16-NEXT:    fcvtzu x2, s1
@@ -6878,6 +7036,7 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvtzu x0, h0
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -6902,7 +7061,9 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixhfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -6928,6 +7089,7 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptos_v3f16_v3i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
@@ -6943,6 +7105,7 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptos_v3f16_v3i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    fcvtzs x0, h0
@@ -6970,7 +7133,9 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ; CHECK-SD-NEXT:    .cfi_offset w21, -24
 ; CHECK-SD-NEXT:    .cfi_offset w22, -32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -48
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunshfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x19, x0
@@ -6996,6 +7161,7 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: fptou_v3f16_v3i128:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-NOFP16-NEXT:    mov x1, xzr
@@ -7011,6 +7177,7 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: fptou_v3f16_v3i128:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -7135,6 +7302,7 @@ define <3 x i64> @fptos_v3f128_v3i64(<3 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    fmov d1, x0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7191,6 +7359,7 @@ define <3 x i64> @fptou_v3f128_v3i64(<3 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    fmov d1, x0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7239,6 +7408,7 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7257,6 +7427,7 @@ define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7280,6 +7451,7 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7298,6 +7470,7 @@ define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7427,6 +7600,7 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7445,6 +7619,7 @@ define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7468,6 +7643,7 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7486,6 +7662,7 @@ define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7542,6 +7719,7 @@ define <3 x i16> @fptos_v3f128_v3i16(<3 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w20
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[2], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7598,6 +7776,7 @@ define <3 x i16> @fptou_v3f128_v3i16(<3 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w20
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[2], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7621,6 +7800,7 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7639,6 +7819,7 @@ define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -7662,6 +7843,7 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], w0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -7680,6 +7862,7 @@ define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v0.s[0], w19
 ; CHECK-GI-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #32
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index a79c2df4e1970a..9ef6d61c350ecf 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -32,6 +32,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzs w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -67,6 +68,11 @@ define <4 x i32> @test_signed_v4f32_v4i32(<4 x float> %f) {
 define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v5f32_v5i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    fcvtzs v4.4s, v4.4s
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -81,6 +87,11 @@ define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v5f32_v5i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    fcvtzs v1.4s, v4.4s
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -102,6 +113,12 @@ define <5 x i32> @test_signed_v5f32_v5i32(<5 x float> %f) {
 define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v6f32_v6i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -118,6 +135,12 @@ define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v6f32_v6i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -142,6 +165,13 @@ define <6 x i32> @test_signed_v6f32_v6i32(<6 x float> %f) {
 define <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v7f32_v7i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -160,6 +190,13 @@ define <7 x i32> @test_signed_v7f32_v7i32(<7 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v7f32_v7i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -216,6 +253,7 @@ define <1 x i32> @test_signed_v1f64_v1i32(<1 x double> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzs w8, d0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
@@ -229,6 +267,7 @@ define <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzs w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i32:
@@ -263,9 +302,12 @@ define <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v3f64_v3i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    adrp x8, .LCPI10_1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
-; CHECK-GI-NEXT:    adrp x8, .LCPI10_1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI10_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI10_0
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
@@ -333,9 +375,14 @@ define <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v5f64_v5i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    adrp x8, .LCPI12_1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
-; CHECK-GI-NEXT:    adrp x8, .LCPI12_1
 ; CHECK-GI-NEXT:    fcvtzs v3.2d, v4.2d
 ; CHECK-GI-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-GI-NEXT:    fcvtzs v1.2d, v2.2d
@@ -358,9 +405,14 @@ define <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-GI-NEXT:    mov d4, v1.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d1
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    fmov x4, d2
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
 ; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptosi.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
@@ -379,9 +431,15 @@ define <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_signed_v6f64_v6i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-GI-NEXT:    adrp x8, .LCPI13_1
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
-; CHECK-GI-NEXT:    adrp x8, .LCPI13_1
 ; CHECK-GI-NEXT:    mov v4.d[1], v5.d[0]
 ; CHECK-GI-NEXT:    ldr q3, [x8, :lo12:.LCPI13_1]
 ; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
@@ -403,13 +461,19 @@ define <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-GI-NEXT:    bif v2.16b, v3.16b, v6.16b
 ; CHECK-GI-NEXT:    mov d3, v0.d[1]
 ; CHECK-GI-NEXT:    mov d4, v1.d[1]
-; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    mov d5, v2.d[1]
+; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d1
 ; CHECK-GI-NEXT:    fmov x4, d2
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
 ; CHECK-GI-NEXT:    fmov x5, d5
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
+; CHECK-GI-NEXT:    // kill: def $w5 killed $w5 killed $x5
 ; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptosi.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
@@ -501,6 +565,7 @@ define <1 x i32> @test_signed_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-GI-NEXT:    csel w8, wzr, w19, ne
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f128.v1i32(<1 x fp128> %f)
@@ -569,6 +634,7 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w22
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #112
 ; CHECK-SD-NEXT:    ret
 ;
@@ -648,6 +714,7 @@ define <2 x i32> @test_signed_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptosi.sat.v2f128.v2i32(<2 x fp128> %f)
@@ -1123,12 +1190,14 @@ define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) {
 ; CHECK-GI-CVT-NEXT:    fcvt s0, h0
 ; CHECK-GI-CVT-NEXT:    fcvtzs w8, s0
 ; CHECK-GI-CVT-NEXT:    mov v0.s[0], w8
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-CVT-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v1f16_v1i32:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    fcvtzs w8, h0
 ; CHECK-GI-FP16-NEXT:    mov v0.s[0], w8
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
     %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
@@ -1139,6 +1208,7 @@ define <2 x i32> @test_signed_v2f16_v2i32(<2 x half> %f) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f16_v2i32:
@@ -1440,6 +1510,7 @@ define <2 x i32> @test_signed_v2f32_v2i32_duplicate(<2 x float> %f) {
 define <2 x i50> @test_signed_v2f32_v2i50(<2 x float> %f) {
 ; CHECK-SD-LABEL: test_signed_v2f32_v2i50:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
 ; CHECK-SD-NEXT:    fcvtzs x10, s0
@@ -1502,7 +1573,9 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -48
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1558,8 +1631,10 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -48
 ; CHECK-GI-NEXT:    .cfi_offset b9, -56
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1618,7 +1693,9 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -48
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1674,8 +1751,10 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -48
 ; CHECK-GI-NEXT:    .cfi_offset b9, -56
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v9.2s, #255, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1902,6 +1981,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #241, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -1931,6 +2011,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-SD-NEXT:    fcmp s8, s8
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
 ; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
 ; CHECK-SD-NEXT:    bl __fixsfti
@@ -2002,6 +2083,7 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[1]
 ; CHECK-GI-NEXT:    mov s10, v0.s[2]
 ; CHECK-GI-NEXT:    mov s8, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v11.2s, #241, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2096,6 +2178,7 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixsfti
 ; CHECK-SD-NEXT:    movi v9.2s, #255, lsl #24
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2125,6 +2208,7 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
 ; CHECK-SD-NEXT:    fcmp s8, s8
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    csel x21, xzr, x8, vs
 ; CHECK-SD-NEXT:    csel x22, xzr, x9, vs
 ; CHECK-SD-NEXT:    bl __fixsfti
@@ -2196,6 +2280,7 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[1]
 ; CHECK-GI-NEXT:    mov s10, v0.s[2]
 ; CHECK-GI-NEXT:    mov s8, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixsfti
 ; CHECK-GI-NEXT:    movi v11.2s, #255, lsl #24
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2291,6 +2376,7 @@ define <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csinv w9, w9, wzr, ge
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i1:
@@ -2325,6 +2411,7 @@ define <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i8:
@@ -2362,6 +2449,7 @@ define <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i13:
@@ -2399,6 +2487,7 @@ define <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i16:
@@ -2436,6 +2525,7 @@ define <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w8, w11, gt
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i19:
@@ -2463,6 +2553,7 @@ define <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzs w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_signed_v2f64_v2i32_duplicate:
@@ -2546,6 +2637,7 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2604,6 +2696,7 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixdfti
 ; CHECK-GI-NEXT:    mov x8, #-4170333254945079296 // =0xc620000000000000
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2664,6 +2757,7 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b9, -56
 ; CHECK-SD-NEXT:    .cfi_offset b10, -64
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixdfti
 ; CHECK-SD-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2722,6 +2816,7 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b10, -64
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixdfti
 ; CHECK-GI-NEXT:    mov x8, #-4044232465378705408 // =0xc7e0000000000000
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -2968,6 +3063,7 @@ define <4 x i32> @test_signed_v4f16_v4i32_duplicate(<4 x half> %f) {
 define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    fcvt s2, h0
 ; CHECK-SD-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3001,6 +3097,7 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fcvtzs x9, h0
 ; CHECK-SD-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3030,6 +3127,7 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    fcvt s2, h0
 ; CHECK-GI-CVT-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3063,6 +3161,7 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i50:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
 ; CHECK-GI-FP16-NEXT:    mov x8, #562949953421311 // =0x1ffffffffffff
@@ -3096,6 +3195,7 @@ define <4 x i50> @test_signed_v4f16_v4i50(<4 x half> %f) {
 define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_signed_v4f16_v4i64:
 ; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
@@ -3115,6 +3215,7 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_signed_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -3139,6 +3240,7 @@ define <4 x i64> @test_signed_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -3179,6 +3281,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -80
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvt s8, h0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
@@ -3257,6 +3360,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i100:
 ; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -3276,6 +3380,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i100:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -3316,6 +3421,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -80
 ; CHECK-SD-NEXT:    .cfi_offset b9, -88
 ; CHECK-SD-NEXT:    .cfi_offset b10, -96
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvt s8, h0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
@@ -3394,6 +3500,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_signed_v4f16_v4i128:
 ; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -3413,6 +3520,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_signed_v4f16_v4i128:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -5007,111 +5115,111 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    mov v0.s[1], w11
 ; CHECK-SD-NEXT:    csel w12, w13, w8, lt
+; CHECK-SD-NEXT:    mov v0.s[1], w11
 ; CHECK-SD-NEXT:    fcvtzs w11, d1
 ; CHECK-SD-NEXT:    cmn w12, #128
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    fmov s1, w12
 ; CHECK-SD-NEXT:    fcvtzs w12, d2
 ; CHECK-SD-NEXT:    mov d2, v3.d[1]
-; CHECK-SD-NEXT:    mov w13, v0.s[1]
 ; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    mov w13, v0.s[1]
 ; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    fcvtzs w11, d2
-; CHECK-SD-NEXT:    mov d2, v4.d[1]
-; CHECK-SD-NEXT:    mov v0.b[1], w13
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    mov v0.b[1], w13
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w12, #128
 ; CHECK-SD-NEXT:    mov w13, v1.s[1]
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
-; CHECK-SD-NEXT:    fmov s1, w12
+; CHECK-SD-NEXT:    fmov s2, w12
 ; CHECK-SD-NEXT:    fcvtzs w12, d3
-; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    mov d3, v4.d[1]
+; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
+; CHECK-SD-NEXT:    mov v2.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
-; CHECK-SD-NEXT:    fcvtzs w11, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    mov v0.b[3], w13
-; CHECK-SD-NEXT:    mov d2, v5.d[1]
+; CHECK-SD-NEXT:    fcvtzs w11, d3
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    mov v0.b[3], w13
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v1.s[1]
+; CHECK-SD-NEXT:    mov w13, v2.s[1]
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    mov v0.b[4], v1.b[0]
 ; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s1, w12
+; CHECK-SD-NEXT:    fmov s3, w12
 ; CHECK-SD-NEXT:    fcvtzs w12, d4
-; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
+; CHECK-SD-NEXT:    mov d4, v5.d[1]
+; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
-; CHECK-SD-NEXT:    mov v0.b[5], w13
 ; CHECK-SD-NEXT:    cmn w10, #128
-; CHECK-SD-NEXT:    fcvtzs w11, d2
-; CHECK-SD-NEXT:    mov d2, v6.d[1]
+; CHECK-SD-NEXT:    mov v0.b[5], w13
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fcvtzs w11, d4
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w12, #128
-; CHECK-SD-NEXT:    mov w13, v1.s[1]
-; CHECK-SD-NEXT:    mov v0.b[6], v1.b[0]
+; CHECK-SD-NEXT:    mov w13, v3.s[1]
 ; CHECK-SD-NEXT:    csel w12, w12, w9, gt
-; CHECK-SD-NEXT:    cmp w11, #127
-; CHECK-SD-NEXT:    fmov s1, w12
+; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
+; CHECK-SD-NEXT:    fmov s4, w12
 ; CHECK-SD-NEXT:    fcvtzs w12, d5
-; CHECK-SD-NEXT:    mov v0.b[7], w13
-; CHECK-SD-NEXT:    fcvtzs w13, d2
-; CHECK-SD-NEXT:    mov d2, v7.d[1]
-; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    mov d5, v6.d[1]
+; CHECK-SD-NEXT:    mov v4.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w11, w8, lt
+; CHECK-SD-NEXT:    mov v0.b[7], w13
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w12, #127
+; CHECK-SD-NEXT:    fcvtzs w13, d5
 ; CHECK-SD-NEXT:    csel w11, w12, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov w12, v1.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v1.b[0]
+; CHECK-SD-NEXT:    mov w12, v4.s[1]
+; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    fmov s1, w11
+; CHECK-SD-NEXT:    fmov s5, w11
 ; CHECK-SD-NEXT:    fcvtzs w11, d6
+; CHECK-SD-NEXT:    cmp w13, #127
+; CHECK-SD-NEXT:    mov d6, v7.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[9], w12
-; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    mov v5.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w13, w8, lt
-; CHECK-SD-NEXT:    fcvtzs w13, d2
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
+; CHECK-SD-NEXT:    fcvtzs w13, d6
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w11, #128
-; CHECK-SD-NEXT:    mov v0.b[10], v1.b[0]
-; CHECK-SD-NEXT:    mov w12, v1.s[1]
+; CHECK-SD-NEXT:    mov v0.b[10], v5.b[0]
+; CHECK-SD-NEXT:    mov w12, v5.s[1]
 ; CHECK-SD-NEXT:    csel w11, w11, w9, gt
-; CHECK-SD-NEXT:    cmp w13, #127
-; CHECK-SD-NEXT:    fmov s1, w11
+; CHECK-SD-NEXT:    fmov s6, w11
 ; CHECK-SD-NEXT:    fcvtzs w11, d7
+; CHECK-SD-NEXT:    cmp w13, #127
 ; CHECK-SD-NEXT:    mov v0.b[11], w12
-; CHECK-SD-NEXT:    mov v1.s[1], w10
+; CHECK-SD-NEXT:    mov v6.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w13, w8, lt
 ; CHECK-SD-NEXT:    cmn w10, #128
 ; CHECK-SD-NEXT:    csel w10, w10, w9, gt
 ; CHECK-SD-NEXT:    cmp w11, #127
 ; CHECK-SD-NEXT:    csel w8, w11, w8, lt
 ; CHECK-SD-NEXT:    cmn w8, #128
-; CHECK-SD-NEXT:    mov v0.b[12], v1.b[0]
-; CHECK-SD-NEXT:    mov w11, v1.s[1]
+; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
+; CHECK-SD-NEXT:    mov w11, v6.s[1]
 ; CHECK-SD-NEXT:    csel w8, w8, w9, gt
-; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    fmov s7, w8
 ; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v1.s[1], w10
-; CHECK-SD-NEXT:    mov v0.b[14], v1.b[0]
-; CHECK-SD-NEXT:    mov w8, v1.s[1]
+; CHECK-SD-NEXT:    mov v7.s[1], w10
+; CHECK-SD-NEXT:    mov v0.b[14], v7.b[0]
+; CHECK-SD-NEXT:    mov w8, v7.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -5303,32 +5411,26 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w13, w9
 ; CHECK-SD-NEXT:    csel w11, w13, w9, lt
 ; CHECK-SD-NEXT:    fcvtzs w13, d3
-; CHECK-SD-NEXT:    fmov s3, w12
 ; CHECK-SD-NEXT:    cmn w11, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w11, w11, w8, gt
 ; CHECK-SD-NEXT:    cmp w14, w9
 ; CHECK-SD-NEXT:    csel w14, w14, w9, lt
-; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    cmn w14, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w14, w14, w8, gt
 ; CHECK-SD-NEXT:    cmp w13, w9
 ; CHECK-SD-NEXT:    csel w13, w13, w9, lt
-; CHECK-SD-NEXT:    fmov s2, w14
 ; CHECK-SD-NEXT:    cmn w13, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w13, w13, w8, gt
 ; CHECK-SD-NEXT:    cmp w15, w9
 ; CHECK-SD-NEXT:    csel w15, w15, w9, lt
-; CHECK-SD-NEXT:    mov v2.s[1], w11
 ; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w16, w15, w8, gt
 ; CHECK-SD-NEXT:    cmp w17, w9
 ; CHECK-SD-NEXT:    csel w15, w17, w9, lt
-; CHECK-SD-NEXT:    fmov s1, w16
 ; CHECK-SD-NEXT:    cmn w15, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w15, w15, w8, gt
 ; CHECK-SD-NEXT:    cmp w18, w9
 ; CHECK-SD-NEXT:    csel w17, w18, w9, lt
-; CHECK-SD-NEXT:    mov v1.s[1], w13
 ; CHECK-SD-NEXT:    cmn w17, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w17, w17, w8, gt
 ; CHECK-SD-NEXT:    cmp w0, w9
@@ -5351,32 +5453,38 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w2, w9
 ; CHECK-SD-NEXT:    fcvtzs w5, d0
 ; CHECK-SD-NEXT:    csel w2, w2, w9, lt
-; CHECK-SD-NEXT:    fmov s0, w17
+; CHECK-SD-NEXT:    fmov s3, w12
 ; CHECK-SD-NEXT:    mov v7.s[1], w18
 ; CHECK-SD-NEXT:    cmn w2, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w2, w2, w8, gt
 ; CHECK-SD-NEXT:    cmp w3, w9
 ; CHECK-SD-NEXT:    csel w3, w3, w9, lt
+; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    fmov s6, w2
-; CHECK-SD-NEXT:    mov v0.s[1], w15
 ; CHECK-SD-NEXT:    cmn w3, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    fmov s2, w14
 ; CHECK-SD-NEXT:    csel w3, w3, w8, gt
 ; CHECK-SD-NEXT:    cmp w4, w9
 ; CHECK-SD-NEXT:    csel w4, w4, w9, lt
 ; CHECK-SD-NEXT:    mov v6.s[1], w0
 ; CHECK-SD-NEXT:    cmn w4, #8, lsl #12 // =32768
+; CHECK-SD-NEXT:    mov v2.s[1], w11
 ; CHECK-SD-NEXT:    csel w12, w4, w8, gt
 ; CHECK-SD-NEXT:    cmp w5, w9
+; CHECK-SD-NEXT:    fmov s1, w16
 ; CHECK-SD-NEXT:    csel w10, w5, w9, lt
 ; CHECK-SD-NEXT:    fmov s5, w12
 ; CHECK-SD-NEXT:    cmn w10, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w10, w10, w8, gt
 ; CHECK-SD-NEXT:    cmp w6, w9
+; CHECK-SD-NEXT:    mov v1.s[1], w13
 ; CHECK-SD-NEXT:    csel w9, w6, w9, lt
 ; CHECK-SD-NEXT:    mov v5.s[1], w3
+; CHECK-SD-NEXT:    fmov s0, w17
 ; CHECK-SD-NEXT:    cmn w9, #8, lsl #12 // =32768
 ; CHECK-SD-NEXT:    csel w8, w9, w8, gt
 ; CHECK-SD-NEXT:    fmov s4, w8
+; CHECK-SD-NEXT:    mov v0.s[1], w15
 ; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
 ; CHECK-SD-NEXT:    ldr q16, [x8, :lo12:.LCPI85_0]
 ; CHECK-SD-NEXT:    mov v4.s[1], w10

diff  --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index 45133667df6ddc..e1670ad2dc053b 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -32,6 +32,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzu w8, s0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f)
     ret <1 x i32> %x
@@ -67,6 +68,11 @@ define <4 x i32> @test_unsigned_v4f32_v4i32(<4 x float> %f) {
 define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v5f32_v5i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    fcvtzu v4.4s, v4.4s
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -81,6 +87,11 @@ define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v5f32_v5i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.4s, v4.4s
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -102,6 +113,12 @@ define <5 x i32> @test_unsigned_v5f32_v5i32(<5 x float> %f) {
 define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v6f32_v6i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -118,6 +135,12 @@ define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v6f32_v6i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -142,6 +165,13 @@ define <6 x i32> @test_unsigned_v6f32_v6i32(<6 x float> %f) {
 define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v7f32_v7i32:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-SD-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-SD-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-SD-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-SD-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-SD-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-SD-NEXT:    mov v0.s[2], v2.s[0]
@@ -160,6 +190,13 @@ define <7 x i32> @test_unsigned_v7f32_v7i32(<7 x float> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v7f32_v7i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-GI-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-GI-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-GI-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-GI-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-GI-NEXT:    // kill: def $s6 killed $s6 def $q6
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-GI-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[2], v2.s[0]
@@ -216,6 +253,7 @@ define <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    fcvtzu w8, d0
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f)
     ret <1 x i32> %x
@@ -229,6 +267,7 @@ define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i32:
@@ -258,6 +297,9 @@ define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v3f64_v3i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtzu v1.2d, v2.2d
 ; CHECK-GI-NEXT:    movi v2.2d, #0x000000ffffffff
@@ -314,6 +356,11 @@ define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v5f64_v5i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    movi v1.2d, #0x000000ffffffff
@@ -330,9 +377,14 @@ define <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) {
 ; CHECK-GI-NEXT:    mov d4, v2.d[1]
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d2
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    fmov x4, d1
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
 ; CHECK-GI-NEXT:    ret
     %x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f)
     ret <5 x i32> %x
@@ -351,6 +403,12 @@ define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
 ;
 ; CHECK-GI-LABEL: test_unsigned_v6f64_v6i32:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v4.d[1], v5.d[0]
@@ -370,9 +428,15 @@ define <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) {
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x2, d2
 ; CHECK-GI-NEXT:    fmov x4, d1
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
+; CHECK-GI-NEXT:    // kill: def $w4 killed $w4 killed $x4
 ; CHECK-GI-NEXT:    fmov x1, d3
 ; CHECK-GI-NEXT:    fmov x3, d4
 ; CHECK-GI-NEXT:    fmov x5, d5
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 killed $x3
+; CHECK-GI-NEXT:    // kill: def $w5 killed $w5 killed $x5
 ; CHECK-GI-NEXT:    ret
     %x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f)
     ret <6 x i32> %x
@@ -450,6 +514,7 @@ define <1 x i32> @test_unsigned_v1f128_v1i32(<1 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #48
 ; CHECK-GI-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f128.v1i32(<1 x fp128> %f)
@@ -502,6 +567,7 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w20
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #96
 ; CHECK-SD-NEXT:    ret
 ;
@@ -567,6 +633,7 @@ define <2 x i32> @test_unsigned_v2f128_v2i32(<2 x fp128> %f) {
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.s[1], w0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #96
 ; CHECK-GI-NEXT:    ret
     %x = call <2 x i32> @llvm.fptoui.sat.v2f128.v2i32(<2 x fp128> %f)
@@ -954,12 +1021,14 @@ define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) {
 ; CHECK-GI-CVT-NEXT:    fcvt s0, h0
 ; CHECK-GI-CVT-NEXT:    fcvtzu w8, s0
 ; CHECK-GI-CVT-NEXT:    mov v0.s[0], w8
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-CVT-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v1f16_v1i32:
 ; CHECK-GI-FP16:       // %bb.0:
 ; CHECK-GI-FP16-NEXT:    fcvtzu w8, h0
 ; CHECK-GI-FP16-NEXT:    mov v0.s[0], w8
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
     %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f)
     ret <1 x i32> %x
@@ -970,6 +1039,7 @@ define <2 x i32> @test_unsigned_v2f16_v2i32(<2 x half> %f) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-SD-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f16_v2i32:
@@ -1216,6 +1286,7 @@ define <2 x i32> @test_unsigned_v2f32_v2i32_duplicate(<2 x float> %f) {
 define <2 x i50> @test_unsigned_v2f32_v2i50(<2 x float> %f) {
 ; CHECK-SD-LABEL: test_unsigned_v2f32_v2i50:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov s1, v0.s[1]
 ; CHECK-SD-NEXT:    fcvtzu x9, s0
 ; CHECK-SD-NEXT:    mov x10, #1125899906842623 // =0x3ffffffffffff
@@ -1265,7 +1336,9 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1307,8 +1380,10 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -32
 ; CHECK-GI-NEXT:    .cfi_offset b8, -40
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1352,7 +1427,9 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -32
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1392,8 +1469,10 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -32
 ; CHECK-GI-NEXT:    .cfi_offset b8, -40
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1576,6 +1655,7 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1599,6 +1679,7 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    csel x21, x25, x9, gt
 ; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -1656,6 +1737,7 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #1904214015 // =0x717fffff
@@ -1729,6 +1811,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1751,6 +1834,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    csinv x21, x9, xzr, le
 ; CHECK-SD-NEXT:    csinv x22, x8, xzr, le
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunssfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s8, v0.s[1]
@@ -1807,6 +1891,7 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunssfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov w8, #2139095039 // =0x7f7fffff
@@ -1885,6 +1970,7 @@ define <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csinc w9, w9, wzr, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i1:
@@ -1913,6 +1999,7 @@ define <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w8, w10, w8, lo
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i8:
@@ -1940,6 +2027,7 @@ define <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i13:
@@ -1968,6 +2056,7 @@ define <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i16:
@@ -1995,6 +2084,7 @@ define <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) {
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    fmov s0, w9
 ; CHECK-SD-NEXT:    mov v0.s[1], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i19:
@@ -2018,6 +2108,7 @@ define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v2f64_v2i32_duplicate:
@@ -2083,6 +2174,7 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
@@ -2126,6 +2218,7 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunsdfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov x8, #5057542381537067007 // =0x462fffffffffffff
@@ -2170,6 +2263,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset b8, -40
 ; CHECK-SD-NEXT:    .cfi_offset b9, -48
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl __fixunsdfti
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
@@ -2211,6 +2305,7 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -48
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl __fixunsdfti
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov x8, #5183643171103440895 // =0x47efffffffffffff
@@ -2417,6 +2512,7 @@ define <4 x i32> @test_unsigned_v4f16_v4i32_duplicate(<4 x half> %f) {
 define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2441,6 +2537,7 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2461,6 +2558,7 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2485,6 +2583,7 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i50:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x8, #1125899906842623 // =0x3ffffffffffff
@@ -2509,6 +2608,7 @@ define <4 x i50> @test_unsigned_v4f16_v4i50(<4 x half> %f) {
 define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
 ; CHECK-SD-CVT-LABEL: test_unsigned_v4f16_v4i64:
 ; CHECK-SD-CVT:       // %bb.0:
+; CHECK-SD-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-CVT-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-CVT-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-CVT-NEXT:    mov h3, v0.h[3]
@@ -2528,6 +2628,7 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-SD-FP16-LABEL: test_unsigned_v4f16_v4i64:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    mov h2, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    mov h3, v0.h[3]
@@ -2552,6 +2653,7 @@ define <4 x i64> @test_unsigned_v4f16_v4i64(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i64:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]
@@ -2588,6 +2690,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -64
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s8, h1
@@ -2650,6 +2753,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i100:
 ; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -2669,6 +2773,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i100:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -2704,6 +2809,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ; CHECK-SD-NEXT:    .cfi_offset w30, -64
 ; CHECK-SD-NEXT:    .cfi_offset b8, -72
 ; CHECK-SD-NEXT:    .cfi_offset b9, -80
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvt s8, h0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fmov s0, s8
@@ -2765,6 +2871,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-CVT-LABEL: test_unsigned_v4f16_v4i128:
 ; CHECK-GI-CVT:       // %bb.0:
+; CHECK-GI-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-CVT-NEXT:    mov x1, xzr
@@ -2784,6 +2891,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
 ;
 ; CHECK-GI-FP16-LABEL: test_unsigned_v4f16_v4i128:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov x1, xzr
@@ -4053,8 +4161,8 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w12, d2
 ; CHECK-SD-NEXT:    fcvtzu w14, d1
 ; CHECK-SD-NEXT:    fcvtzu w8, d4
-; CHECK-SD-NEXT:    fcvtzu w10, d5
 ; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d5
 ; CHECK-SD-NEXT:    fcvtzu w13, d3
 ; CHECK-SD-NEXT:    cmp w8, #255
 ; CHECK-SD-NEXT:    fcvtzu w15, d4
@@ -4062,29 +4170,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w11, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    fmov s3, w9
+; CHECK-SD-NEXT:    fmov s4, w9
 ; CHECK-SD-NEXT:    csel w9, w10, w11, lo
 ; CHECK-SD-NEXT:    cmp w12, #255
 ; CHECK-SD-NEXT:    fcvtzu w10, d0
-; CHECK-SD-NEXT:    mov v3.s[1], w8
+; CHECK-SD-NEXT:    mov v4.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w12, w11, lo
 ; CHECK-SD-NEXT:    cmp w13, #255
-; CHECK-SD-NEXT:    fmov s2, w8
+; CHECK-SD-NEXT:    fmov s3, w8
 ; CHECK-SD-NEXT:    csel w8, w13, w11, lo
 ; CHECK-SD-NEXT:    cmp w14, #255
-; CHECK-SD-NEXT:    mov v2.s[1], w9
+; CHECK-SD-NEXT:    mov v3.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w14, w11, lo
 ; CHECK-SD-NEXT:    cmp w15, #255
-; CHECK-SD-NEXT:    fmov s1, w9
+; CHECK-SD-NEXT:    fmov s2, w9
 ; CHECK-SD-NEXT:    csel w9, w15, w11, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov v1.s[1], w8
+; CHECK-SD-NEXT:    mov v2.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w10, w11, lo
-; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    fmov s1, w8
 ; CHECK-SD-NEXT:    adrp x8, .LCPI82_0
-; CHECK-SD-NEXT:    ldr d4, [x8, :lo12:.LCPI82_0]
-; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
+; CHECK-SD-NEXT:    ldr d0, [x8, :lo12:.LCPI82_0]
+; CHECK-SD-NEXT:    mov v1.s[1], w9
+; CHECK-SD-NEXT:    tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i8:
@@ -4125,29 +4233,29 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    fmov s0, w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d16
+; CHECK-SD-NEXT:    mov d16, v2.d[1]
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d1
-; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    mov w11, v0.s[1]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s16, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d1
-; CHECK-SD-NEXT:    mov d1, v3.d[1]
+; CHECK-SD-NEXT:    fmov s1, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d16
+; CHECK-SD-NEXT:    mov d16, v3.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[1], w11
-; CHECK-SD-NEXT:    mov v16.s[1], w10
+; CHECK-SD-NEXT:    mov v1.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d2
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov w11, v16.s[1]
-; CHECK-SD-NEXT:    mov v0.b[2], v16.b[0]
+; CHECK-SD-NEXT:    mov w11, v1.s[1]
+; CHECK-SD-NEXT:    mov v0.b[2], v1.b[0]
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    fmov s2, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d1
-; CHECK-SD-NEXT:    mov d1, v4.d[1]
+; CHECK-SD-NEXT:    fcvtzu w10, d16
+; CHECK-SD-NEXT:    mov d16, v4.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[3], w11
 ; CHECK-SD-NEXT:    mov v2.s[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d3
@@ -4157,58 +4265,58 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) {
 ; CHECK-SD-NEXT:    mov w11, v2.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[4], v2.b[0]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s2, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d1
-; CHECK-SD-NEXT:    mov d1, v5.d[1]
+; CHECK-SD-NEXT:    fmov s3, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d16
+; CHECK-SD-NEXT:    mov d16, v5.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[5], w11
-; CHECK-SD-NEXT:    mov v2.s[1], w10
+; CHECK-SD-NEXT:    mov v3.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d4
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov w11, v2.s[1]
-; CHECK-SD-NEXT:    mov v0.b[6], v2.b[0]
+; CHECK-SD-NEXT:    mov w11, v3.s[1]
+; CHECK-SD-NEXT:    mov v0.b[6], v3.b[0]
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s2, w10
-; CHECK-SD-NEXT:    fcvtzu w10, d1
-; CHECK-SD-NEXT:    mov d1, v6.d[1]
+; CHECK-SD-NEXT:    fmov s4, w10
+; CHECK-SD-NEXT:    fcvtzu w10, d16
 ; CHECK-SD-NEXT:    mov v0.b[7], w11
-; CHECK-SD-NEXT:    mov v2.s[1], w9
+; CHECK-SD-NEXT:    mov v4.s[1], w9
 ; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    mov d5, v6.d[1]
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov w11, v2.s[1]
-; CHECK-SD-NEXT:    mov v0.b[8], v2.b[0]
+; CHECK-SD-NEXT:    mov w11, v4.s[1]
+; CHECK-SD-NEXT:    mov v0.b[8], v4.b[0]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
-; CHECK-SD-NEXT:    fmov s2, w9
-; CHECK-SD-NEXT:    fcvtzu w9, d1
-; CHECK-SD-NEXT:    mov d1, v7.d[1]
+; CHECK-SD-NEXT:    fmov s16, w9
+; CHECK-SD-NEXT:    fcvtzu w9, d5
+; CHECK-SD-NEXT:    mov d5, v7.d[1]
 ; CHECK-SD-NEXT:    mov v0.b[9], w11
-; CHECK-SD-NEXT:    mov v2.s[1], w10
+; CHECK-SD-NEXT:    mov v16.s[1], w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d6
 ; CHECK-SD-NEXT:    cmp w9, #255
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
-; CHECK-SD-NEXT:    mov v0.b[10], v2.b[0]
-; CHECK-SD-NEXT:    mov w11, v2.s[1]
+; CHECK-SD-NEXT:    mov v0.b[10], v16.b[0]
+; CHECK-SD-NEXT:    mov w11, v16.s[1]
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s2, w10
+; CHECK-SD-NEXT:    fmov s6, w10
 ; CHECK-SD-NEXT:    fcvtzu w10, d7
 ; CHECK-SD-NEXT:    mov v0.b[11], w11
-; CHECK-SD-NEXT:    mov v2.s[1], w9
-; CHECK-SD-NEXT:    fcvtzu w9, d1
+; CHECK-SD-NEXT:    mov v6.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w9, d5
 ; CHECK-SD-NEXT:    cmp w9, #255
-; CHECK-SD-NEXT:    mov v0.b[12], v2.b[0]
-; CHECK-SD-NEXT:    mov w11, v2.s[1]
+; CHECK-SD-NEXT:    mov v0.b[12], v6.b[0]
+; CHECK-SD-NEXT:    mov w11, v6.s[1]
 ; CHECK-SD-NEXT:    csel w9, w9, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, #255
 ; CHECK-SD-NEXT:    csel w8, w10, w8, lo
-; CHECK-SD-NEXT:    fmov s1, w8
+; CHECK-SD-NEXT:    fmov s5, w8
 ; CHECK-SD-NEXT:    mov v0.b[13], w11
-; CHECK-SD-NEXT:    mov v1.s[1], w9
-; CHECK-SD-NEXT:    mov v0.b[14], v1.b[0]
-; CHECK-SD-NEXT:    mov w8, v1.s[1]
+; CHECK-SD-NEXT:    mov v5.s[1], w9
+; CHECK-SD-NEXT:    mov v0.b[14], v5.b[0]
+; CHECK-SD-NEXT:    mov w8, v5.s[1]
 ; CHECK-SD-NEXT:    mov v0.b[15], w8
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4262,8 +4370,8 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w12, d2
 ; CHECK-SD-NEXT:    fcvtzu w14, d1
 ; CHECK-SD-NEXT:    fcvtzu w8, d4
-; CHECK-SD-NEXT:    fcvtzu w11, d5
 ; CHECK-SD-NEXT:    mov d4, v0.d[1]
+; CHECK-SD-NEXT:    fcvtzu w11, d5
 ; CHECK-SD-NEXT:    fcvtzu w13, d3
 ; CHECK-SD-NEXT:    cmp w8, w10
 ; CHECK-SD-NEXT:    fcvtzu w15, d4
@@ -4271,29 +4379,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) {
 ; CHECK-SD-NEXT:    cmp w9, w10
 ; CHECK-SD-NEXT:    csel w9, w9, w10, lo
 ; CHECK-SD-NEXT:    cmp w11, w10
-; CHECK-SD-NEXT:    fmov s3, w9
+; CHECK-SD-NEXT:    fmov s4, w9
 ; CHECK-SD-NEXT:    csel w9, w11, w10, lo
 ; CHECK-SD-NEXT:    cmp w12, w10
 ; CHECK-SD-NEXT:    fcvtzu w11, d0
-; CHECK-SD-NEXT:    mov v3.s[1], w8
+; CHECK-SD-NEXT:    mov v4.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w12, w10, lo
 ; CHECK-SD-NEXT:    cmp w13, w10
-; CHECK-SD-NEXT:    fmov s2, w8
+; CHECK-SD-NEXT:    fmov s3, w8
 ; CHECK-SD-NEXT:    csel w8, w13, w10, lo
 ; CHECK-SD-NEXT:    cmp w14, w10
-; CHECK-SD-NEXT:    mov v2.s[1], w9
+; CHECK-SD-NEXT:    mov v3.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w14, w10, lo
 ; CHECK-SD-NEXT:    cmp w15, w10
-; CHECK-SD-NEXT:    fmov s1, w9
+; CHECK-SD-NEXT:    fmov s2, w9
 ; CHECK-SD-NEXT:    csel w9, w15, w10, lo
 ; CHECK-SD-NEXT:    cmp w11, w10
-; CHECK-SD-NEXT:    mov v1.s[1], w8
+; CHECK-SD-NEXT:    mov v2.s[1], w8
 ; CHECK-SD-NEXT:    csel w8, w11, w10, lo
-; CHECK-SD-NEXT:    fmov s0, w8
+; CHECK-SD-NEXT:    fmov s1, w8
 ; CHECK-SD-NEXT:    adrp x8, .LCPI84_0
-; CHECK-SD-NEXT:    ldr q4, [x8, :lo12:.LCPI84_0]
-; CHECK-SD-NEXT:    mov v0.s[1], w9
-; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-SD-NEXT:    ldr q0, [x8, :lo12:.LCPI84_0]
+; CHECK-SD-NEXT:    mov v1.s[1], w9
+; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v8f64_v8i16:
@@ -4352,53 +4460,53 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) {
 ; CHECK-SD-NEXT:    fcvtzu w16, d0
 ; CHECK-SD-NEXT:    csel w11, w11, w8, lo
 ; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    fmov s18, w11
 ; CHECK-SD-NEXT:    mov v19.s[1], w13
 ; CHECK-SD-NEXT:    csel w13, w17, w8, lo
 ; CHECK-SD-NEXT:    cmp w10, w8
 ; CHECK-SD-NEXT:    csel w10, w10, w8, lo
 ; CHECK-SD-NEXT:    cmp w18, w8
-; CHECK-SD-NEXT:    fcvtzu w17, d2
+; CHECK-SD-NEXT:    fmov s18, w11
 ; CHECK-SD-NEXT:    csel w11, w18, w8, lo
 ; CHECK-SD-NEXT:    cmp w12, w8
-; CHECK-SD-NEXT:    mov v18.s[1], w9
+; CHECK-SD-NEXT:    fcvtzu w17, d2
 ; CHECK-SD-NEXT:    csel w12, w12, w8, lo
 ; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    fmov s17, w10
+; CHECK-SD-NEXT:    fcvtzu w18, d6
+; CHECK-SD-NEXT:    mov v18.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w16, w8, lo
 ; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    fcvtzu w16, d5
+; CHECK-SD-NEXT:    fmov s17, w10
 ; CHECK-SD-NEXT:    csel w10, w14, w8, lo
-; CHECK-SD-NEXT:    fcvtzu w18, d6
+; CHECK-SD-NEXT:    fcvtzu w16, d5
+; CHECK-SD-NEXT:    fmov s23, w10
 ; CHECK-SD-NEXT:    cmp w17, w8
-; CHECK-SD-NEXT:    fmov s5, w10
+; CHECK-SD-NEXT:    fcvtzu w14, d3
 ; CHECK-SD-NEXT:    csel w10, w17, w8, lo
 ; CHECK-SD-NEXT:    cmp w15, w8
-; CHECK-SD-NEXT:    fcvtzu w14, d3
 ; CHECK-SD-NEXT:    fcvtzu w17, d4
-; CHECK-SD-NEXT:    fmov s16, w12
 ; CHECK-SD-NEXT:    mov v17.s[1], w13
-; CHECK-SD-NEXT:    mov v5.s[1], w9
+; CHECK-SD-NEXT:    mov v23.s[1], w9
 ; CHECK-SD-NEXT:    csel w9, w15, w8, lo
 ; CHECK-SD-NEXT:    cmp w18, w8
-; CHECK-SD-NEXT:    fmov s4, w9
+; CHECK-SD-NEXT:    fmov s22, w9
 ; CHECK-SD-NEXT:    csel w9, w18, w8, lo
 ; CHECK-SD-NEXT:    cmp w16, w8
-; CHECK-SD-NEXT:    mov v16.s[1], w11
-; CHECK-SD-NEXT:    mov v4.s[1], w10
+; CHECK-SD-NEXT:    fmov s16, w12
+; CHECK-SD-NEXT:    mov v22.s[1], w10
 ; CHECK-SD-NEXT:    csel w10, w16, w8, lo
 ; CHECK-SD-NEXT:    cmp w14, w8
-; CHECK-SD-NEXT:    fmov s3, w10
+; CHECK-SD-NEXT:    fmov s21, w10
 ; CHECK-SD-NEXT:    csel w10, w14, w8, lo
 ; CHECK-SD-NEXT:    cmp w17, w8
 ; CHECK-SD-NEXT:    csel w8, w17, w8, lo
-; CHECK-SD-NEXT:    fmov s2, w8
+; CHECK-SD-NEXT:    mov v16.s[1], w11
+; CHECK-SD-NEXT:    mov v21.s[1], w9
+; CHECK-SD-NEXT:    fmov s20, w8
 ; CHECK-SD-NEXT:    adrp x8, .LCPI85_0
-; CHECK-SD-NEXT:    mov v3.s[1], w9
 ; CHECK-SD-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
-; CHECK-SD-NEXT:    mov v2.s[1], w10
+; CHECK-SD-NEXT:    mov v20.s[1], w10
 ; CHECK-SD-NEXT:    tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: test_unsigned_v16f64_v16i16:

diff  --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll
index 84c008d34d5f9b..2187717c4148ae 100644
--- a/llvm/test/CodeGen/AArch64/fptrunc.ll
+++ b/llvm/test/CodeGen/AArch64/fptrunc.ll
@@ -97,12 +97,15 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfhf2
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfhf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -117,9 +120,11 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    mov v2.d[1], x8
 ; CHECK-GI-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfhf2
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfhf2
@@ -128,6 +133,7 @@ define <2 x half> @fptrunc_v2f128_v2f16(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #64
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -146,12 +152,15 @@ define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfsf2
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfsf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -163,10 +172,12 @@ define <2 x float> @fptrunc_v2f128_v2f32(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfsf2
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfsf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
 ; CHECK-GI-NEXT:    fmov d0, d1
@@ -188,10 +199,12 @@ define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
 ; CHECK-SD-NEXT:    bl __trunctfdf2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    bl __trunctfdf2
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -205,10 +218,12 @@ define <2 x double> @fptrunc_v2f128_v2f64(<2 x fp128> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __trunctfdf2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    bl __trunctfdf2
 ; CHECK-GI-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v1.16b
@@ -232,6 +247,9 @@ entry:
 define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) {
 ; CHECK-SD-LABEL: fptrunc_v3f64_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-SD-NEXT:    fcvtn2 v0.4s, v2.2d
@@ -239,8 +257,10 @@ define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) {
 ;
 ; CHECK-GI-LABEL: fptrunc_v3f64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fcvt s2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fcvtn v1.2s, v0.2d
 ; CHECK-GI-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[1]
@@ -275,6 +295,7 @@ define <2 x half> @fptrunc_v2f64_v2f16(<2 x double> %a) {
 ; CHECK-GI-NEXT:    fcvt h0, d0
 ; CHECK-GI-NEXT:    fcvt h1, d1
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptrunc <2 x double> %a to <2 x half>
@@ -289,6 +310,7 @@ define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-NEXT:    fcvt h1, d2
 ; CHECK-SD-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptrunc_v3f64_v3f16:
@@ -298,6 +320,7 @@ define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
 ; CHECK-GI-NEXT:    fcvt h2, d2
 ; CHECK-GI-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptrunc <3 x double> %a to <3 x half>
@@ -323,6 +346,7 @@ define <4 x half> @fptrunc_v4f64_v4f16(<4 x double> %a) {
 ; CHECK-GI-NEXT:    fcvt h2, d3
 ; CHECK-GI-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-GI-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = fptrunc <4 x double> %a to <4 x half>
@@ -332,11 +356,13 @@ entry:
 define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
 ; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov v1.s[0], v0.s[0]
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    fcvtn v0.4h, v1.4s

diff  --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index eb8f961b369943..6efd9f40f00688 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -243,8 +243,9 @@ define <3 x i64> @freeze_v3i64() {
 ; CHECK-SD-LABEL: freeze_v3i64:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v0.2d
-; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
 ; CHECK-SD-NEXT:    fmov d2, d0
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: freeze_v3i64:
@@ -253,6 +254,7 @@ define <3 x i64> @freeze_v3i64() {
 ; CHECK-GI-NEXT:    add x8, x8, x8
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %y1 = freeze <3 x i64> undef
   %t1 = add <3 x i64> %y1, %y1
@@ -295,8 +297,10 @@ define <3 x ptr> @freeze_v3p0() {
 ; CHECK-SD-NEXT:    mov w8, #4 // =0x4
 ; CHECK-SD-NEXT:    dup v2.2d, x8
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
-; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
 ; CHECK-SD-NEXT:    add d2, d0, d2
+; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: freeze_v3p0:

diff  --git a/llvm/test/CodeGen/AArch64/frem-power2.ll b/llvm/test/CodeGen/AArch64/frem-power2.ll
index 29a86fd5b27b80..98276b68481a10 100644
--- a/llvm/test/CodeGen/AArch64/frem-power2.ll
+++ b/llvm/test/CodeGen/AArch64/frem-power2.ll
@@ -6,12 +6,14 @@ define float @frem2(float %x) {
 ; CHECK-SD-LABEL: frem2:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s1, #0.50000000
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    fmov s2, #-2.00000000
 ; CHECK-SD-NEXT:    fmul s1, s0, s1
 ; CHECK-SD-NEXT:    frintz s1, s1
 ; CHECK-SD-NEXT:    fmadd s1, s1, s2, s0
 ; CHECK-SD-NEXT:    mvni v2.4s, #128, lsl #24
 ; CHECK-SD-NEXT:    bit v0.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: frem2:
@@ -364,20 +366,25 @@ define <4 x float> @frem2_vec(<4 x float> %x) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, #2.00000000
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, #2.00000000
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, #2.00000000
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -418,20 +425,25 @@ define <4 x float> @frem2_nsz_vec(<4 x float> %x) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, #2.00000000
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, #2.00000000
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, #2.00000000
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -479,20 +491,25 @@ define <4 x float> @frem1152921504606846976_absv(<4 x float> %x) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/frem.ll b/llvm/test/CodeGen/AArch64/frem.ll
index a08178dada813b..feb13da64cbf8a 100644
--- a/llvm/test/CodeGen/AArch64/frem.ll
+++ b/llvm/test/CodeGen/AArch64/frem.ll
@@ -73,10 +73,14 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl fmod
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl fmod
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #64
@@ -93,12 +97,16 @@ define <2 x double> @frem_v2f64(<2 x double> %a, <2 x double> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d9
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl fmod
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -199,18 +207,25 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #48] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d1, v2.d[1]
 ; CHECK-SD-NEXT:    bl fmod
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl fmod
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    mov d1, v1.d[1]
 ; CHECK-SD-NEXT:    bl fmod
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bl fmod
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp, #16] // 32-byte Folded Reload
@@ -238,28 +253,35 @@ define <4 x double> @frem_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d10, v2.d[1]
 ; CHECK-GI-NEXT:    mov d11, v3.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    mov d9, v4.d[1]
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    fmov d1, d10
+; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl fmod
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    fmov d1, d11
+; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl fmod
-; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    ldr q2, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q3, q1, [sp, #16] // 32-byte Folded Reload
-; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
@@ -275,16 +297,23 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -297,14 +326,20 @@ define <2 x float> @frem_v2f32(<2 x float> %a, <2 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -16
 ; CHECK-GI-NEXT:    .cfi_offset b8, -24
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s9
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #16] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -327,10 +362,14 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -338,6 +377,7 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -358,18 +398,23 @@ define <3 x float> @frem_v3f32(<3 x float> %a, <3 x float> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s10, v1.s[1]
 ; CHECK-GI-NEXT:    mov s11, v1.s[2]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s10
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
@@ -394,10 +439,14 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -405,6 +454,7 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -412,6 +462,7 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -437,22 +488,28 @@ define <4 x float> @frem_v4f32(<4 x float> %a, <4 x float> %b) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #64] // 16-byte Folded Reload
@@ -481,10 +538,14 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    stp q1, q3, [sp, #32] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s1, v2.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -492,6 +553,7 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp] // 32-byte Folded Reload
@@ -499,16 +561,21 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[3]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #64] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    mov s1, v1.s[1]
 ; CHECK-SD-NEXT:    bl fmodf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -516,6 +583,7 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-SD-NEXT:    mov s1, v1.s[2]
 ; CHECK-SD-NEXT:    bl fmodf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldp q0, q1, [sp, #32] // 32-byte Folded Reload
@@ -553,6 +621,7 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v3.s[1]
 ; CHECK-GI-NEXT:    mov s11, v3.s[2]
 ; CHECK-GI-NEXT:    mov s2, v4.s[1]
@@ -561,39 +630,50 @@ define <8 x float> @frem_v8f32(<8 x float> %a, <8 x float> %b) {
 ; CHECK-GI-NEXT:    mov s14, v1.s[1]
 ; CHECK-GI-NEXT:    mov s15, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    str s2, [sp, #48] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v4.s[2]
 ; CHECK-GI-NEXT:    str s2, [sp, #112] // 4-byte Folded Spill
 ; CHECK-GI-NEXT:    mov s2, v3.s[3]
 ; CHECK-GI-NEXT:    stp s2, s5, [sp, #200] // 8-byte Folded Spill
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    fmov s1, s14
+; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    fmov s1, s15
+; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
-; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    fmov s1, s13
+; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    fmov s1, s12
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #48] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    fmov s1, s11
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr s0, [sp, #112] // 4-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #112] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldp s1, s0, [sp, #200] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    bl fmodf
 ; CHECK-GI-NEXT:    ldp q3, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #176] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #192] // 8-byte Folded Reload
@@ -815,6 +895,8 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h3, v0.h[1]
 ; CHECK-SD-NEXT:    mov h2, v1.h[1]
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #16] // 32-byte Folded Spill
@@ -854,6 +936,7 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #64
 ; CHECK-SD-NEXT:    ret
 ;
@@ -872,6 +955,8 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    .cfi_offset b11, -48
 ; CHECK-GI-NEXT:    .cfi_offset b12, -56
 ; CHECK-GI-NEXT:    .cfi_offset b13, -64
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -909,7 +994,8 @@ define <4 x half> @frem_v4f16(<4 x half> %a, <4 x half> %b) {
 ; CHECK-GI-NEXT:    ldp d13, d12, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #112
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1473,23 +1559,22 @@ define <16 x half> @frem_v16f16(<16 x half> %a, <16 x half> %b) {
 ; CHECK-GI-NEXT:    fcvt s1, h0
 ; CHECK-GI-NEXT:    fmov s0, s2
 ; CHECK-GI-NEXT:    bl fmodf
-; CHECK-GI-NEXT:    ldr q1, [sp, #304] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #304] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
 ; CHECK-GI-NEXT:    ldr q1, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp x29, x30, [sp, #432] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #80] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #416] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #320] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #400] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #128] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #384] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #272] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #368] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fsincos.ll b/llvm/test/CodeGen/AArch64/fsincos.ll
index 57e83c04dac4c5..2afc56a7139fbf 100644
--- a/llvm/test/CodeGen/AArch64/fsincos.ll
+++ b/llvm/test/CodeGen/AArch64/fsincos.ll
@@ -68,10 +68,13 @@ define <2 x double> @sin_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl sin
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl sin
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -86,11 +89,14 @@ define <2 x double> @sin_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl sin
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -172,17 +178,22 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sin
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl sin
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl sin
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl sin
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -203,23 +214,28 @@ define <4 x double> @sin_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl sin
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl sin
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -235,15 +251,20 @@ define <2 x float> @sin_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -255,12 +276,16 @@ define <2 x float> @sin_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -282,16 +307,20 @@ define <3 x float> @sin_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -309,14 +338,18 @@ define <3 x float> @sin_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -339,22 +372,27 @@ define <4 x float> @sin_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -375,17 +413,22 @@ define <4 x float> @sin_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -411,37 +454,46 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl sinf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl sinf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl sinf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -475,31 +527,41 @@ define <8 x float> @sin_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl sinf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl sinf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -678,6 +740,7 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -708,6 +771,7 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -722,6 +786,7 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -751,7 +816,8 @@ define <4 x half> @sin_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -1174,20 +1240,19 @@ define <16 x half> @sin_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl sinf
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]
@@ -1305,10 +1370,13 @@ define <2 x double> @cos_v2f64(<2 x double> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl cos
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl cos
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1323,11 +1391,14 @@ define <2 x double> @cos_v2f64(<2 x double> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl cos
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1409,17 +1480,22 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cos
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl cos
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov d0, v0.d[1]
 ; CHECK-SD-NEXT:    bl cos
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    bl cos
 ; CHECK-SD-NEXT:    fmov d1, d0
 ; CHECK-SD-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -1440,23 +1516,28 @@ define <4 x double> @cos_v4f64(<4 x double> %a) {
 ; CHECK-GI-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    mov d8, v0.d[1]
 ; CHECK-GI-NEXT:    mov d9, v1.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d8
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    bl cos
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov d0, d9
 ; CHECK-GI-NEXT:    bl cos
-; CHECK-GI-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-GI-NEXT:    fmov d2, d1
-; CHECK-GI-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    mov v2.d[1], v1.d[0]
+; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
-; CHECK-GI-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
@@ -1472,15 +1553,20 @@ define <2 x float> @cos_v2f32(<2 x float> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1492,12 +1578,16 @@ define <2 x float> @cos_v2f32(<2 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -1519,16 +1609,20 @@ define <3 x float> @cos_v3f32(<3 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1546,14 +1640,18 @@ define <3 x float> @cos_v3f32(<3 x float> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b9, -32
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v2.s[0]
@@ -1576,22 +1674,27 @@ define <4 x float> @cos_v4f32(<4 x float> %a) {
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    mov v0.16b, v1.16b
@@ -1612,17 +1715,22 @@ define <4 x float> @cos_v4f32(<4 x float> %a) {
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -1648,37 +1756,46 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-SD-NEXT:    stp q0, q1, [sp] // 32-byte Folded Spill
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    bl cosf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[1]
 ; CHECK-SD-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl cosf
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[2]
 ; CHECK-SD-NEXT:    bl cosf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-SD-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    mov s0, v0.s[3]
@@ -1712,31 +1829,41 @@ define <8 x float> @cos_v8f32(<8 x float> %a) {
 ; CHECK-GI-NEXT:    mov s9, v0.s[2]
 ; CHECK-GI-NEXT:    mov s10, v0.s[3]
 ; CHECK-GI-NEXT:    mov s11, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    mov s12, v1.s[2]
 ; CHECK-GI-NEXT:    mov s13, v1.s[3]
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s8
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s9
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s10
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #80] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s11
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s12
 ; CHECK-GI-NEXT:    bl cosf
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s13
 ; CHECK-GI-NEXT:    bl cosf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp, #48] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #160] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d9, d8, [sp, #144] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp d11, d10, [sp, #128] // 16-byte Folded Reload
@@ -1915,6 +2042,7 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    fcvt s0, h1
@@ -1945,6 +2073,7 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-SD-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1959,6 +2088,7 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
 ; CHECK-GI-NEXT:    .cfi_offset b9, -24
 ; CHECK-GI-NEXT:    .cfi_offset b10, -32
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov h8, v0.h[1]
 ; CHECK-GI-NEXT:    mov h9, v0.h[2]
 ; CHECK-GI-NEXT:    mov h10, v0.h[3]
@@ -1988,7 +2118,8 @@ define <4 x half> @cos_v4f16(<4 x half> %a) {
 ; CHECK-GI-NEXT:    mov v1.h[1], v3.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    mov v1.h[3], v0.h[0]
-; CHECK-GI-NEXT:    fmov d0, d1
+; CHECK-GI-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    add sp, sp, #80
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -2411,20 +2542,19 @@ define <16 x half> @cos_v16f16(<16 x half> %a) {
 ; CHECK-GI-NEXT:    str q0, [sp, #160] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    fmov s0, s1
 ; CHECK-GI-NEXT:    bl cosf
-; CHECK-GI-NEXT:    ldr q1, [sp, #192] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldr q3, [sp, #192] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr q2, [sp, #112] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp, #304] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    fmov s3, s1
-; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
-; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldp q1, q2, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d9, d8, [sp, #288] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d11, d10, [sp, #272] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #224] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d13, d12, [sp, #256] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #32] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    ldp d15, d14, [sp, #240] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.h[2], v2.h[0]
 ; CHECK-GI-NEXT:    ldr q2, [sp, #208] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v3.h[3], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/fsqrt.ll b/llvm/test/CodeGen/AArch64/fsqrt.ll
index 466f1a14c29c46..6c5fd8e52b017c 100644
--- a/llvm/test/CodeGen/AArch64/fsqrt.ll
+++ b/llvm/test/CodeGen/AArch64/fsqrt.ll
@@ -66,18 +66,27 @@ entry:
 define <3 x double> @sqrt_v3f64(<3 x double> %a) {
 ; CHECK-SD-LABEL: sqrt_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    fsqrt v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fsqrt v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sqrt_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fsqrt d2, d2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    fsqrt v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = call <3 x double> @llvm.sqrt.v3f64(<3 x double> %a)
@@ -216,6 +225,7 @@ entry:
 define <4 x half> @sqrt_v4f16(<4 x half> %a) {
 ; CHECK-SD-NOFP16-LABEL: sqrt_v4f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    mov h3, v0.h[2]
@@ -234,6 +244,7 @@ define <4 x half> @sqrt_v4f16(<4 x half> %a) {
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-SD-NOFP16-NEXT:    fcvt h1, s4
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: sqrt_v4f16:

diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll
index e5aa360f804c1f..3037a9552bc27e 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll
@@ -21,6 +21,7 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-SD-LABEL: fshl_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsr w8, w1, #1
+; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w0, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -265,6 +266,7 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
 ; CHECK-SD-LABEL: fshr_i32:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
+; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9
@@ -665,6 +667,7 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-SD-LABEL: or_shl_fshl_simplify:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsr w8, w0, #1
+; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsl w10, w1, w2
 ; CHECK-SD-NEXT:    lsr w8, w8, w9
@@ -693,6 +696,7 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) {
 ; CHECK-SD-LABEL: or_lshr_fshr_simplify:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    lsl w8, w0, #1
+; CHECK-SD-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-SD-NEXT:    mvn w9, w2
 ; CHECK-SD-NEXT:    lsr w10, w1, w2
 ; CHECK-SD-NEXT:    lsl w8, w8, w9

diff  --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
index bf68b6bc7eaa96..2d84a69f3144e3 100644
--- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
+++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll
@@ -124,7 +124,9 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE-NEXT:    fmov s0, w8
 ; CHECK-SVE-NEXT:    fmov s1, w9
 ; CHECK-SVE-NEXT:    mov v0.s[1], w10
+; CHECK-SVE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SVE-NEXT:    mov v1.s[1], w11
+; CHECK-SVE-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SVE-NEXT:    b use
 ;
 ; CHECK-SVE2p1-LABEL: test_fixed_extract:
@@ -138,7 +140,9 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
 ; CHECK-SVE2p1-NEXT:    fmov s0, w8
 ; CHECK-SVE2p1-NEXT:    fmov s1, w9
 ; CHECK-SVE2p1-NEXT:    mov v0.s[1], w10
+; CHECK-SVE2p1-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SVE2p1-NEXT:    mov v1.s[1], w11
+; CHECK-SVE2p1-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SVE2p1-NEXT:    b use
     %r = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 %i, i64 %n)
     %v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv8i1.i64(<vscale x 8 x i1> %r, i64 0)

diff  --git a/llvm/test/CodeGen/AArch64/get_vector_length.ll b/llvm/test/CodeGen/AArch64/get_vector_length.ll
index 73dc32dfe4721f..3a83edb339b848 100644
--- a/llvm/test/CodeGen/AArch64/get_vector_length.ll
+++ b/llvm/test/CodeGen/AArch64/get_vector_length.ll
@@ -33,6 +33,7 @@ define i32 @vector_length_i64(i64 %tc) {
 ; CHECK-NEXT:    cntd x8
 ; CHECK-NEXT:    cmp x0, x8
 ; CHECK-NEXT:    csel x0, x0, x8, lo
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %a = call i32 @llvm.experimental.get.vector.length.i64(i64 %tc, i32 2, i1 true)
   ret i32 %a

diff  --git a/llvm/test/CodeGen/AArch64/half.ll b/llvm/test/CodeGen/AArch64/half.ll
index 032b659f30b1b1..bb802033e05fc6 100644
--- a/llvm/test/CodeGen/AArch64/half.ll
+++ b/llvm/test/CodeGen/AArch64/half.ll
@@ -25,6 +25,7 @@ define i16 @test_bitcast_from_half(ptr %addr) {
 define i16 @test_reg_bitcast_from_half(half %in) {
 ; CHECK-LABEL: test_reg_bitcast_from_half:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %val = bitcast half %in to i16
@@ -45,6 +46,7 @@ define half @test_reg_bitcast_to_half(i16 %in) {
 ; CHECK-LABEL: test_reg_bitcast_to_half:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s0, w0
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
   %val = bitcast i16 %in to half
   ret half %val

diff  --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
index c3fdc7db2abbee..cb9f04a7fac48a 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
@@ -15,6 +15,7 @@
 define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x80
 ; CHECK-NEXT:    cset w0, eq
@@ -28,6 +29,7 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_lowestbit_eq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -41,6 +43,7 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x18
 ; CHECK-NEXT:    cset w0, eq
@@ -56,6 +59,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_signbit_eq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x8000
 ; CHECK-NEXT:    cset w0, eq
@@ -69,6 +73,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_lowestbit_eq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -82,6 +87,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    tst w8, #0xff0
 ; CHECK-NEXT:    cset w0, eq
@@ -262,6 +268,7 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_ne:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w0, w1
 ; CHECK-NEXT:    ubfx w0, w8, #7, #1
 ; CHECK-NEXT:    ret
@@ -318,6 +325,7 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #128 // =0x80
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    cmp w8, #1

diff  --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 4a73b10811d293..32a62453202f40 100644
--- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -16,6 +16,7 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x80
 ; CHECK-NEXT:    cset w0, eq
@@ -30,6 +31,7 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_lowestbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -44,6 +46,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x18
 ; CHECK-NEXT:    cset w0, eq
@@ -60,6 +63,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_signbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x8000
 ; CHECK-NEXT:    cset w0, eq
@@ -74,6 +78,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_lowestbit_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0x1
 ; CHECK-NEXT:    cset w0, eq
@@ -88,6 +93,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    tst w8, #0xff0
 ; CHECK-NEXT:    cset w0, eq
@@ -266,6 +272,7 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_ne:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsr w8, w8, w1
 ; CHECK-NEXT:    lsr w0, w8, #7
 ; CHECK-NEXT:    ret
@@ -282,7 +289,7 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32_x_is_const_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #43605 // =0xaa55
+; CHECK-NEXT:    mov w8, #43605
 ; CHECK-NEXT:    movk w8, #43605, lsl #16
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    tst w8, #0x1
@@ -296,8 +303,8 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind {
 define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32_x_is_const2_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1 // =0x1
-; CHECK-NEXT:    mov w9, #43605 // =0xaa55
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w9, #43605
 ; CHECK-NEXT:    lsl w8, w8, w0
 ; CHECK-NEXT:    movk w9, #43605, lsl #16
 ; CHECK-NEXT:    tst w8, w9
@@ -312,7 +319,8 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind {
 define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_bitsinmiddle_slt:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #24 // =0x18
+; CHECK-NEXT:    mov w8, #24
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    ubfx w0, w8, #7, #1
@@ -326,7 +334,8 @@ define i1 @scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind {
 define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-128 // =0xffffff80
+; CHECK-NEXT:    mov w8, #-128
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl w8, w8, w1
 ; CHECK-NEXT:    and w8, w8, w0
 ; CHECK-NEXT:    and w8, w8, #0x80

diff  --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 189706f6ed4edd..61964060ca2c8b 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -1123,33 +1123,52 @@ entry:
 define <3 x i64> @v3i64_i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: v3i64_i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-SD-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    ldr d16, [sp, #24]
+; CHECK-SD-NEXT:    ldr d17, [sp]
 ; CHECK-SD-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-SD-NEXT:    ldr d16, [sp, #24]
-; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-SD-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-SD-NEXT:    ldr d17, [sp]
+; CHECK-SD-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-SD-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-SD-NEXT:    cmgt v0.2d, v3.2d, v0.2d
 ; CHECK-SD-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-SD-NEXT:    cmgt v1.2d, v5.2d, v2.2d
 ; CHECK-SD-NEXT:    mov v2.16b, v1.16b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    bsl v2.16b, v17.16b, v16.16b
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: v3i64_i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    // kill: def $d6 killed $d6 def $q6
+; CHECK-GI-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-GI-NEXT:    // kill: def $d7 killed $d7 def $q7
+; CHECK-GI-NEXT:    ldr x8, [sp]
+; CHECK-GI-NEXT:    ldr x10, [sp, #24]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
-; CHECK-GI-NEXT:    ldr x8, [sp]
-; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-GI-NEXT:    cmgt v2.2d, v5.2d, v2.2d
+; CHECK-GI-NEXT:    ldp d1, d4, [sp, #8]
 ; CHECK-GI-NEXT:    mov v6.d[1], v7.d[0]
-; CHECK-GI-NEXT:    ldr x10, [sp, #24]
+; CHECK-GI-NEXT:    fmov x9, d2
 ; CHECK-GI-NEXT:    mov v1.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    cmgt v0.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT:    fmov x9, d2
 ; CHECK-GI-NEXT:    sbfx x9, x9, #0, #1
 ; CHECK-GI-NEXT:    bsl v0.16b, v6.16b, v1.16b
 ; CHECK-GI-NEXT:    and x8, x8, x9
@@ -1157,6 +1176,7 @@ define <3 x i64> @v3i64_i64(<3 x i64> %a, <3 x i64> %b, <3 x i64> %d, <3 x i64>
 ; CHECK-GI-NEXT:    orr x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = icmp slt <3 x i64> %a, %b

diff  --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index bf20309b5b938b..47aa34e3c01156 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -32,15 +32,15 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: STRXui $x0, %stack.0, 0 :: (store (s64) into %stack.0)
     ; CHECK-NEXT: dead undef [[COPY:%[0-9]+]].sub_32:gpr64 = COPY $x1
-    ; CHECK-NEXT: dead undef [[DEF:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def %8
-    ; CHECK-NEXT: STRXui %8, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: dead undef [[DEF:%[0-9]+]].sub_32:gpr64 = IMPLICIT_DEF implicit-def %6
+    ; CHECK-NEXT: STRXui %6, %stack.1, 0 :: (store (s64) into %stack.1)
     ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
     ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
     ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
     ; CHECK-NEXT: STRXui [[LDRXui1]], [[LDRXui]], 1 :: (store (s64) into stack + 8)
-    ; CHECK-NEXT: STRXui undef %9:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
+    ; CHECK-NEXT: STRXui undef %8:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
     ; CHECK-NEXT: RET_ReallyLR
     %0:gpr64sp = COPY $x0
     undef %1.sub_32:gpr64 = COPY $x1
@@ -70,15 +70,15 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: STRXui $x0, %stack.0, 0 :: (store (s64) into %stack.0)
     ; CHECK-NEXT: dead undef [[COPY:%[0-9]+]].sub_32:gpr64 = COPY $x1
-    ; CHECK-NEXT: dead undef [[MOVi32imm:%[0-9]+]].sub_32:gpr64 = MOVi32imm 4, implicit-def %8
-    ; CHECK-NEXT: STRXui %8, %stack.1, 0 :: (store (s64) into %stack.1)
+    ; CHECK-NEXT: dead undef [[MOVi32imm:%[0-9]+]].sub_32:gpr64 = MOVi32imm 4, implicit-def %6
+    ; CHECK-NEXT: STRXui %6, %stack.1, 0 :: (store (s64) into %stack.1)
     ; CHECK-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: BL @foo, csr_darwin_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
     ; CHECK-NEXT: ADJCALLSTACKUP 8, 0, implicit-def dead $sp, implicit $sp
     ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
     ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
     ; CHECK-NEXT: STRXui [[LDRXui1]], [[LDRXui]], 1 :: (store (s64) into stack + 8)
-    ; CHECK-NEXT: STRXui undef %9:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
+    ; CHECK-NEXT: STRXui undef %8:gpr64, [[LDRXui]], 0 :: (store (s64) into stack)
     ; CHECK-NEXT: RET_ReallyLR
     %0:gpr64sp = COPY $x0
     undef %1.sub_32:gpr64 = COPY $x1

diff  --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
index f98855dfd992b8..0f208f8ed90524 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
+++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll
@@ -80,6 +80,7 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ; in Loop: Header=BB0_2 Depth=1
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    mov w8, #1 ; =0x1
 ; CHECK-NEXT:    stp xzr, xzr, [sp]
 ; CHECK-NEXT:    stp x8, xzr, [sp, #16]
 ; CHECK-NEXT:    bl _fprintf
@@ -91,6 +92,7 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a
 ; CHECK-NEXT:    ldr x8, [sp, #40] ; 8-byte Folded Reload
 ; CHECK-NEXT:    mov x0, xzr
 ; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    ; kill: def $w8 killed $w8 killed $x8 def $x8
 ; CHECK-NEXT:    str x8, [sp]
 ; CHECK-NEXT:    bl _fprintf
 ; CHECK-NEXT:    brk #0x1

diff  --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a764026d1f81b4..a5d74ef75f0a0a 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -51,7 +51,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.9(0x01288b01), %bb.5(0x01288b01), %bb.2(0x11f46a91), %bb.6(0x23e8d524), %bb.7(0x47d1aa49)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   dead early-clobber %12:gpr64, dead early-clobber %13:gpr64sp = JumpTableDest32 [[DEF8]], [[DEF7]], %jump-table.0
-  ; CHECK-NEXT:   BR undef [[DEF10]]
+  ; CHECK-NEXT:   BR undef %18:gpr64
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
   ; CHECK-NEXT:   successors: %bb.8(0x80000000)

diff  --git a/llvm/test/CodeGen/AArch64/insert-extend.ll b/llvm/test/CodeGen/AArch64/insert-extend.ll
index 11d947816b07e0..851fb0d03e8aa3 100644
--- a/llvm/test/CodeGen/AArch64/insert-extend.ll
+++ b/llvm/test/CodeGen/AArch64/insert-extend.ll
@@ -46,6 +46,8 @@ define <8 x i16> @load4_v4i8_zext_add(float %tmp, ptr %a, ptr %b) {
 define i32 @large(ptr nocapture noundef readonly %p1, i32 noundef %st1, ptr nocapture noundef readonly %p2, i32 noundef %st2) {
 ; CHECK-LABEL: large:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll
index a12a8c92605980..6828fa9f1508c8 100644
--- a/llvm/test/CodeGen/AArch64/insert-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll
@@ -47,10 +47,11 @@ define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) {
 define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) {
 ; CHECK-LABEL: insert_v16i8_4_15:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    mov v3.16b, v1.16b
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   ret <16 x i8> %s2
@@ -90,7 +91,9 @@ define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) {
 ; CHECK-LABEL: insert_v8i8_4_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i8> %s2
@@ -100,7 +103,9 @@ define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) {
 ; CHECK-LABEL: insert_v8i8_4_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
   ret <8 x i8> %s2
@@ -141,10 +146,11 @@ define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) {
 define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) {
 ; CHECK-LABEL: insert_v8i16_2_15:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI13_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-NEXT:    mov v3.16b, v1.16b
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i16> %s2
@@ -184,7 +190,9 @@ define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) {
 ; CHECK-LABEL: insert_v4i16_2_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   ret <4 x i16> %s2
@@ -194,7 +202,9 @@ define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) {
 ; CHECK-LABEL: insert_v4i16_2_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x i16> %s2
@@ -262,6 +272,7 @@ define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) {
 define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v16i8_4_15:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI24_0
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI24_0]
@@ -313,7 +324,9 @@ define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v8i8_4_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <4 x i8>, ptr %a
   %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -327,6 +340,7 @@ define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr s2, [x0]
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <4 x i8>, ptr %a
   %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -366,6 +380,7 @@ define <8 x i8> @load_v8i8_2_1(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[0], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -377,6 +392,7 @@ define <8 x i8> @load_v8i8_2_15(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v8i8_2_15:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    adrp x8, .LCPI33_0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI33_0]
@@ -394,6 +410,7 @@ define <8 x i8> @load_v8i8_2_2(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -407,6 +424,7 @@ define <8 x i8> @load_v8i8_2_3(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -420,6 +438,7 @@ define <8 x i8> @load_v8i8_2_4(float %tmp, <8 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr h2, [x0]
 ; CHECK-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -431,8 +450,10 @@ define <4 x i8> @load_v4i8_2_1(float %tmp, <4 x i8> %b, ptr %a) {
 ; CHECK-LABEL: load_v4i8_2_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr h0, [x0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    zip1 v0.8b, v0.8b, v0.8b
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -447,6 +468,7 @@ define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) {
 ; CHECK-NEXT:    zip1 v2.8b, v0.8b, v0.8b
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i8>, ptr %a
   %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -471,6 +493,7 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) {
 define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) {
 ; CHECK-LABEL: load_v8i16_2_15:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $q0_q1
 ; CHECK-NEXT:    adrp x8, .LCPI40_0
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI40_0]
@@ -522,7 +545,9 @@ define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) {
 ; CHECK-LABEL: load_v4i16_2_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i16>, ptr %a
   %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -536,6 +561,7 @@ define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, ptr %a) {
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    ldr s2, [x0]
 ; CHECK-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %l = load <2 x i16>, ptr %a
   %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
@@ -604,6 +630,7 @@ define <8 x i8> @load2_v4i8(float %tmp, ptr %a, ptr %b) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr s0, [x0]
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %la = load <4 x i8>, ptr %a
   %lb = load <4 x i8>, ptr %b

diff  --git a/llvm/test/CodeGen/AArch64/insertextract.ll b/llvm/test/CodeGen/AArch64/insertextract.ll
index 8c1840ee788c07..54ee693db1239f 100644
--- a/llvm/test/CodeGen/AArch64/insertextract.ll
+++ b/llvm/test/CodeGen/AArch64/insertextract.ll
@@ -5,6 +5,7 @@
 define <2 x double> @insert_v2f64_0(<2 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -15,6 +16,7 @@ entry:
 define <2 x double> @insert_v2f64_1(<2 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f64_1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -28,6 +30,7 @@ define <2 x double> @insert_v2f64_c(<2 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    str d1, [x8]
@@ -53,9 +56,13 @@ entry:
 define <3 x double> @insert_v3f64_0(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3f64_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-SD-NEXT:    mov v0.d[0], v3.d[0]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: insert_v3f64_0:
@@ -80,6 +87,10 @@ entry:
 define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3f64_c:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
@@ -89,6 +100,8 @@ define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-SD-NEXT:    ldr q0, [sp]
 ; CHECK-SD-NEXT:    ldr d2, [sp, #16]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    add sp, sp, #32
 ; CHECK-SD-NEXT:    ret
 ;
@@ -101,14 +114,19 @@ define <3 x double> @insert_v3f64_c(<3 x double> %a, double %b, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w8, w0
 ; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    str d3, [x9, x8, lsl #3]
 ; CHECK-GI-NEXT:    ldp q0, q2, [sp]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    mov sp, x29
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
@@ -120,6 +138,7 @@ entry:
 define <4 x double> @insert_v4f64_0(<4 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[0], v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -130,6 +149,7 @@ entry:
 define <4 x double> @insert_v4f64_2(<4 x double> %a, double %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f64_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v1.d[0], v2.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -142,6 +162,7 @@ define <4 x double> @insert_v4f64_c(<4 x double> %a, double %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str d2, [x9, x8, lsl #3]
@@ -174,7 +195,10 @@ entry:
 define <2 x float> @insert_v2f32_0(<2 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x float> %a, float %b, i32 0
@@ -184,7 +208,10 @@ entry:
 define <2 x float> @insert_v2f32_1(<2 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v2f32_1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x float> %a, float %b, i32 1
@@ -197,6 +224,7 @@ define <2 x float> @insert_v2f32_c(<2 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SD-NEXT:    str s1, [x8]
@@ -224,6 +252,7 @@ entry:
 define <3 x float> @insert_v3f32_0(<3 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v3f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v1.s[1], v0.s[1]
 ; CHECK-NEXT:    mov v1.s[2], v0.s[2]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -236,12 +265,14 @@ entry:
 define <3 x float> @insert_v3f32_2(<3 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3f32_2:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-SD-NEXT:    mov v0.s[2], v1.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: insert_v3f32_2:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v2.s[0], v0.s[0]
+; CHECK-GI-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-GI-NEXT:    mov v2.s[1], v0.s[1]
 ; CHECK-GI-NEXT:    mov v2.s[2], v1.s[0]
 ; CHECK-GI-NEXT:    mov v0.16b, v2.16b
@@ -257,6 +288,7 @@ define <3 x float> @insert_v3f32_c(<3 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str s1, [x8]
@@ -282,6 +314,7 @@ entry:
 define <4 x float> @insert_v4f32_0(<4 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -292,6 +325,7 @@ entry:
 define <4 x float> @insert_v4f32_2(<4 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f32_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[2], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -305,6 +339,7 @@ define <4 x float> @insert_v4f32_c(<4 x float> %a, float %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    str s1, [x8]
@@ -330,6 +365,7 @@ entry:
 define <8 x float> @insert_v8f32_0(<8 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    mov v0.s[0], v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -340,6 +376,7 @@ entry:
 define <8 x float> @insert_v8f32_2(<8 x float> %a, float %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f32_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -352,6 +389,7 @@ define <8 x float> @insert_v8f32_c(<8 x float> %a, float %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str s2, [x9, x8, lsl #2]
@@ -384,7 +422,10 @@ entry:
 define <4 x half> @insert_v4f16_0(<4 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x half> %a, half %b, i32 0
@@ -394,7 +435,10 @@ entry:
 define <4 x half> @insert_v4f16_2(<4 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v4f16_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x half> %a, half %b, i32 2
@@ -407,6 +451,7 @@ define <4 x half> @insert_v4f16_c(<4 x half> %a, half %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    str h1, [x8]
@@ -434,6 +479,7 @@ entry:
 define <8 x half> @insert_v8f16_0(<8 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -444,6 +490,7 @@ entry:
 define <8 x half> @insert_v8f16_2(<8 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v8f16_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -457,6 +504,7 @@ define <8 x half> @insert_v8f16_c(<8 x half> %a, half %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    str h1, [x8]
@@ -482,6 +530,7 @@ entry:
 define <16 x half> @insert_v16f16_0(<16 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v16f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    mov v0.h[0], v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -492,6 +541,7 @@ entry:
 define <16 x half> @insert_v16f16_2(<16 x half> %a, half %b, i32 %c) {
 ; CHECK-LABEL: insert_v16f16_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $q2
 ; CHECK-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -504,6 +554,7 @@ define <16 x half> @insert_v16f16_c(<16 x half> %a, half %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str h2, [x9, x8, lsl #1]
@@ -536,7 +587,9 @@ entry:
 define <8 x i8> @insert_v8i8_0(<8 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-LABEL: insert_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[0], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <8 x i8> %a, i8 %b, i32 0
@@ -546,7 +599,9 @@ entry:
 define <8 x i8> @insert_v8i8_2(<8 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-LABEL: insert_v8i8_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[2], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <8 x i8> %a, i8 %b, i32 2
@@ -559,6 +614,7 @@ define <8 x i8> @insert_v8i8_c(<8 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfxil x8, x1, #0, #3
 ; CHECK-SD-NEXT:    strb w0, [x8]
@@ -611,6 +667,7 @@ define <16 x i8> @insert_v16i8_c(<16 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfxil x8, x1, #0, #4
 ; CHECK-SD-NEXT:    strb w0, [x8]
@@ -660,6 +717,7 @@ define <32 x i8> @insert_v32i8_c(<32 x i8> %a, i8 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x1f
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    strb w0, [x9, x8]
@@ -694,7 +752,9 @@ entry:
 define <4 x i16> @insert_v4i16_0(<4 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-LABEL: insert_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x i16> %a, i16 %b, i32 0
@@ -704,7 +764,9 @@ entry:
 define <4 x i16> @insert_v4i16_2(<4 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-LABEL: insert_v4i16_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[2], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <4 x i16> %a, i16 %b, i32 2
@@ -717,6 +779,7 @@ define <4 x i16> @insert_v4i16_c(<4 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x1, #1, #2
 ; CHECK-SD-NEXT:    strh w0, [x8]
@@ -767,6 +830,7 @@ define <8 x i16> @insert_v8i16_c(<8 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #1, #3
 ; CHECK-SD-NEXT:    strh w0, [x8]
@@ -814,6 +878,7 @@ define <16 x i16> @insert_v16i16_c(<16 x i16> %a, i16 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    strh w0, [x9, x8, lsl #1]
@@ -846,7 +911,9 @@ entry:
 define <2 x i32> @insert_v2i32_0(<2 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: insert_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x i32> %a, i32 %b, i32 0
@@ -856,7 +923,9 @@ entry:
 define <2 x i32> @insert_v2i32_1(<2 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: insert_v2i32_1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = insertelement <2 x i32> %a, i32 %b, i32 1
@@ -869,6 +938,7 @@ define <2 x i32> @insert_v2i32_c(<2 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x1, #2, #1
 ; CHECK-SD-NEXT:    str w0, [x8]
@@ -938,6 +1008,7 @@ define <3 x i32> @insert_v3i32_c(<3 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-SD-NEXT:    str w0, [x8]
@@ -986,6 +1057,7 @@ define <4 x i32> @insert_v4i32_c(<4 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #2, #2
 ; CHECK-SD-NEXT:    str w0, [x8]
@@ -1033,6 +1105,7 @@ define <8 x i32> @insert_v8i32_c(<8 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str w0, [x9, x8, lsl #2]
@@ -1088,6 +1161,7 @@ define <2 x i64> @insert_v2i64_c(<2 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x1, #3, #1
 ; CHECK-SD-NEXT:    str x0, [x8]
@@ -1113,9 +1187,12 @@ entry:
 define <3 x i64> @insert_v3i64_0(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3i64_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    mov v0.d[0], x0
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: insert_v3i64_0:
@@ -1140,6 +1217,10 @@ entry:
 define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-LABEL: insert_v3i64_c:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
@@ -1149,6 +1230,8 @@ define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD-NEXT:    ldr q0, [sp]
 ; CHECK-SD-NEXT:    ldr d2, [sp, #16]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    add sp, sp, #32
 ; CHECK-SD-NEXT:    ret
 ;
@@ -1161,14 +1244,19 @@ define <3 x i64> @insert_v3i64_c(<3 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w8, w1
 ; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    str x0, [x9, x8, lsl #3]
 ; CHECK-GI-NEXT:    ldp q0, q2, [sp]
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    mov sp, x29
 ; CHECK-GI-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ret
@@ -1202,6 +1290,7 @@ define <4 x i64> @insert_v4i64_c(<4 x i64> %a, i64 %b, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    and x8, x1, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    str x0, [x9, x8, lsl #3]
@@ -1234,6 +1323,7 @@ entry:
 define double @extract_v2f64_0(<2 x double> %a, i32 %c) {
 ; CHECK-LABEL: extract_v2f64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <2 x double> %a, i32 0
@@ -1256,6 +1346,7 @@ define double @extract_v2f64_c(<2 x double> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr d0, [x8]
@@ -1300,8 +1391,12 @@ entry:
 define double @extract_v3f64_c(<3 x double> %a, i32 %c) {
 ; CHECK-SD-LABEL: extract_v3f64_c:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    mov x9, sp
@@ -1318,9 +1413,12 @@ define double @extract_v3f64_c(<3 x double> %a, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    ldr d0, [x9, x8, lsl #3]
@@ -1335,6 +1433,7 @@ entry:
 define double @extract_v4f64_0(<4 x double> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x double> %a, i32 0
@@ -1345,6 +1444,7 @@ define double @extract_v4f64_2(<4 x double> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f64_2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x double> %a, i32 2
@@ -1356,6 +1456,7 @@ define double @extract_v4f64_c(<4 x double> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr d0, [x9, x8, lsl #3]
@@ -1385,9 +1486,16 @@ entry:
 }
 
 define float @extract_v2f32_0(<2 x float> %a, i32 %c) {
-; CHECK-LABEL: extract_v2f32_0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: extract_v2f32_0:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2f32_0:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-GI-NEXT:    ret
 entry:
   %d = extractelement <2 x float> %a, i32 0
   ret float %d
@@ -1396,6 +1504,7 @@ entry:
 define float @extract_v2f32_1(<2 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v2f32_1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -1409,6 +1518,7 @@ define float @extract_v2f32_c(<2 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -1434,6 +1544,7 @@ entry:
 define float @extract_v3f32_0(<3 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v3f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <3 x float> %a, i32 0
@@ -1456,6 +1567,7 @@ define float @extract_v3f32_c(<3 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -1481,6 +1593,7 @@ entry:
 define float @extract_v4f32_0(<4 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <4 x float> %a, i32 0
@@ -1503,6 +1616,7 @@ define float @extract_v4f32_c(<4 x float> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr s0, [x8]
@@ -1528,6 +1642,7 @@ entry:
 define float @extract_v8f32_0(<8 x float> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8f32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <8 x float> %a, i32 0
@@ -1549,6 +1664,7 @@ define float @extract_v8f32_c(<8 x float> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr s0, [x9, x8, lsl #2]
@@ -1578,9 +1694,16 @@ entry:
 }
 
 define half @extract_v4f16_0(<4 x half> %a, i32 %c) {
-; CHECK-LABEL: extract_v4f16_0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: extract_v4f16_0:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v4f16_0:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $h0 killed $h0 killed $d0
+; CHECK-GI-NEXT:    ret
 entry:
   %d = extractelement <4 x half> %a, i32 0
   ret half %d
@@ -1589,6 +1712,7 @@ entry:
 define half @extract_v4f16_2(<4 x half> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4f16_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -1602,6 +1726,7 @@ define half @extract_v4f16_c(<4 x half> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    ldr h0, [x8]
@@ -1627,6 +1752,7 @@ entry:
 define half @extract_v8f16_0(<8 x half> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <8 x half> %a, i32 0
@@ -1649,6 +1775,7 @@ define half @extract_v8f16_c(<8 x half> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    ldr h0, [x8]
@@ -1674,6 +1801,7 @@ entry:
 define half @extract_v16f16_0(<16 x half> %a, i32 %c) {
 ; CHECK-LABEL: extract_v16f16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %d = extractelement <16 x half> %a, i32 0
@@ -1695,6 +1823,7 @@ define half @extract_v16f16_c(<16 x half> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr h0, [x9, x8, lsl #1]
@@ -1726,6 +1855,7 @@ entry:
 define i8 @extract_v8i8_0(<8 x i8> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8i8_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1736,6 +1866,7 @@ entry:
 define i8 @extract_v8i8_2(<8 x i8> %a, i32 %c) {
 ; CHECK-LABEL: extract_v8i8_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -1749,6 +1880,7 @@ define i8 @extract_v8i8_c(<8 x i8> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfxil x8, x0, #0, #3
 ; CHECK-SD-NEXT:    ldrb w0, [x8]
@@ -1799,6 +1931,7 @@ define i8 @extract_v16i8_c(<16 x i8> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfxil x8, x0, #0, #4
 ; CHECK-SD-NEXT:    ldrb w0, [x8]
@@ -1848,6 +1981,7 @@ define i8 @extract_v32i8_c(<32 x i8> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x1f
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldrb w0, [x9, x8]
@@ -1881,6 +2015,7 @@ entry:
 define i16 @extract_v4i16_0(<4 x i16> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4i16_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -1891,6 +2026,7 @@ entry:
 define i16 @extract_v4i16_2(<4 x i16> %a, i32 %c) {
 ; CHECK-LABEL: extract_v4i16_2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[2]
 ; CHECK-NEXT:    ret
 entry:
@@ -1904,6 +2040,7 @@ define i16 @extract_v4i16_c(<4 x i16> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #2
 ; CHECK-SD-NEXT:    ldrh w0, [x8]
@@ -1952,6 +2089,7 @@ define i16 @extract_v8i16_c(<8 x i16> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #1, #3
 ; CHECK-SD-NEXT:    ldrh w0, [x8]
@@ -1999,6 +2137,7 @@ define i16 @extract_v16i16_c(<16 x i16> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0xf
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldrh w0, [x9, x8, lsl #1]
@@ -2028,10 +2167,16 @@ entry:
 }
 
 define i32 @extract_v2i32_0(<2 x i32> %a, i32 %c) {
-; CHECK-LABEL: extract_v2i32_0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: extract_v2i32_0:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v2i32_0:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
 entry:
   %d = extractelement <2 x i32> %a, i32 0
   ret i32 %d
@@ -2040,11 +2185,13 @@ entry:
 define i32 @extract_v2i32_1(<2 x i32> %a, i32 %c) {
 ; CHECK-SD-LABEL: extract_v2i32_1:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: extract_v2i32_1:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s0, v0.s[1]
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    ret
@@ -2059,6 +2206,7 @@ define i32 @extract_v2i32_c(<2 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    add x8, sp, #8
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str d0, [sp, #8]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #1
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -2113,6 +2261,7 @@ define i32 @extract_v3i32_c(<3 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -2167,6 +2316,7 @@ define i32 @extract_v4i32_c(<4 x i32> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #2, #2
 ; CHECK-SD-NEXT:    ldr w0, [x8]
@@ -2220,6 +2370,7 @@ define i32 @extract_v8i32_c(<8 x i32> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x7
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr w0, [x9, x8, lsl #2]
@@ -2280,6 +2431,7 @@ define i64 @extract_v2i64_c(<2 x i64> %a, i32 %c) {
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-SD-NEXT:    mov x8, sp
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    str q0, [sp]
 ; CHECK-SD-NEXT:    bfi x8, x0, #3, #1
 ; CHECK-SD-NEXT:    ldr x0, [x8]
@@ -2303,20 +2455,32 @@ entry:
 }
 
 define i64 @extract_v3i64_0(<3 x i64> %a, i32 %c) {
-; CHECK-LABEL: extract_v3i64_0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: extract_v3i64_0:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v3i64_0:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    ret
 entry:
   %d = extractelement <3 x i64> %a, i32 0
   ret i64 %d
 }
 
 define i64 @extract_v3i64_2(<3 x i64> %a, i32 %c) {
-; CHECK-LABEL: extract_v3i64_2:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov x0, d2
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: extract_v3i64_2:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    fmov x0, d2
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: extract_v3i64_2:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    fmov x0, d2
+; CHECK-GI-NEXT:    ret
 entry:
   %d = extractelement <3 x i64> %a, i32 2
   ret i64 %d
@@ -2325,8 +2489,12 @@ entry:
 define i64 @extract_v3i64_c(<3 x i64> %a, i32 %c) {
 ; CHECK-SD-LABEL: extract_v3i64_c:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    stp q0, q2, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    mov x9, sp
@@ -2343,9 +2511,12 @@ define i64 @extract_v3i64_c(<3 x i64> %a, i32 %c) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa w29, 16
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset w29, -16
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w8, w0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov x9, sp
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    and x8, x8, #0x3
 ; CHECK-GI-NEXT:    stp q0, q2, [sp]
 ; CHECK-GI-NEXT:    ldr x0, [x9, x8, lsl #3]
@@ -2382,6 +2553,7 @@ define i64 @extract_v4i64_c(<4 x i64> %a, i32 %c) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    stp q0, q1, [sp, #-32]!
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-SD-NEXT:    and x8, x0, #0x3
 ; CHECK-SD-NEXT:    mov x9, sp
 ; CHECK-SD-NEXT:    ldr x0, [x9, x8, lsl #3]

diff  --git a/llvm/test/CodeGen/AArch64/insertshuffleload.ll b/llvm/test/CodeGen/AArch64/insertshuffleload.ll
index 8c8d392b9a9b31..b97b9b1dcdcf8d 100644
--- a/llvm/test/CodeGen/AArch64/insertshuffleload.ll
+++ b/llvm/test/CodeGen/AArch64/insertshuffleload.ll
@@ -259,6 +259,7 @@ define <8 x i8> @wrongidx_first(ptr %p) {
 ; CHECK-NEXT:    ldur d0, [x0, #1]
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
 ; CHECK-NEXT:    ld1 { v0.b }[7], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 1
   %l1 = load <8 x i8>, ptr %q
@@ -275,6 +276,7 @@ define <8 x i8> @wrong_last(ptr %p) {
 ; CHECK-NEXT:    add x8, x0, #8
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #1
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x8]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 8
   %l1 = load <8 x i8>, ptr %p
@@ -293,6 +295,7 @@ define <8 x i8> @wrong_shuffle(ptr %p) {
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 1
   %l1 = load <8 x i8>, ptr %q
@@ -345,6 +348,7 @@ define <8 x i8> @wrong_offsetfirst(ptr %p) {
 ; CHECK-NEXT:    ldur d0, [x0, #-1]
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 -1
   %l1 = load <8 x i8>, ptr %q
@@ -361,6 +365,7 @@ define <8 x i8> @wrong_offsetlast(ptr %p) {
 ; CHECK-NEXT:    add x8, x0, #7
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #1
 ; CHECK-NEXT:    ld1 { v0.b }[7], [x8]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 7
   %l1 = load <8 x i8>, ptr %p
@@ -378,6 +383,7 @@ define <8 x i8> @storebetween(ptr %p, ptr %r) {
 ; CHECK-NEXT:    strb wzr, [x1]
 ; CHECK-NEXT:    ext v0.8b, v0.8b, v0.8b, #7
 ; CHECK-NEXT:    ld1 { v0.b }[0], [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %q = getelementptr inbounds i8, ptr %p, i32 1
   %l1 = load <8 x i8>, ptr %q

diff  --git a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
index 698c77dda1388f..cdf2a962f93226 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-cttz-elts-sve.ll
@@ -145,6 +145,7 @@ define i32 @ctz_nxv2i1(<vscale x 2 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 0)
   ret i32 %res
@@ -156,6 +157,7 @@ define i32 @ctz_nxv2i1_poison(<vscale x 2 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %a, i1 1)
   ret i32 %res
@@ -177,8 +179,10 @@ define i32 @add_i32_ctz_nxv2i1_poison(<vscale x 2 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv2i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -192,6 +196,7 @@ define i32 @ctz_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 0)
   ret i32 %res
@@ -203,6 +208,7 @@ define i32 @ctz_nxv4i1_poison(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %a, i1 1)
   ret i32 %res
@@ -224,8 +230,10 @@ define i32 @add_i32_ctz_nxv4i1_poison(<vscale x 4 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv4i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -239,6 +247,7 @@ define i32 @ctz_nxv8i1(<vscale x 8 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 0)
   ret i32 %res
@@ -250,6 +259,7 @@ define i32 @ctz_nxv8i1_poison(<vscale x 8 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %a, i1 1)
   ret i32 %res
@@ -271,8 +281,10 @@ define i32 @add_i32_ctz_nxv8i1_poison(<vscale x 8 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv8i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -286,6 +298,7 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0)
   ret i32 %res
@@ -297,6 +310,7 @@ define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %a) {
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1)
   ret i32 %res
@@ -309,6 +323,7 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    cntp x0, p0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cmp = icmp ne <vscale x 16 x i8> %a, %b
   %select = select <vscale x 16 x i1> %pg, <vscale x 16 x i1> %cmp, <vscale x 16 x i1> zeroinitializer
@@ -333,8 +348,10 @@ define i32 @add_i32_ctz_nxv16i1_poison(<vscale x 16 x i1> %a, i32 %b) {
 ; CHECK-LABEL: add_i32_ctz_nxv16i1_poison:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    brkb p0.b, p1/z, p0.b
 ; CHECK-NEXT:    incp x0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> %a, i1 1)
   %trunc = trunc i64 %res to i32
@@ -354,17 +371,20 @@ define i32 @ctz_v16i1(<16 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v16i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-NEXT:    ptrue p0.b, vl16
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0)
   ret i32 %res
@@ -380,17 +400,20 @@ define i32 @ctz_v16i1_poison(<16 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v16i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-NEXT:    ptrue p0.b, vl16
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1)
   ret i32 %res
@@ -410,8 +433,9 @@ define i64 @add_i64_ctz_v16i1_poison(<16 x i1> %a, i64 %b) {
 ;
 ; STREAMING-LABEL: add_i64_ctz_v16i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-NEXT:    ptrue p0.b, vl16
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
@@ -433,17 +457,20 @@ define i32 @ctz_v8i1(<8 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v8i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; STREAMING-NEXT:    ptrue p0.b, vl8
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 0)
   ret i32 %res
@@ -459,17 +486,20 @@ define i32 @ctz_v8i1_poison(<8 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.b
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v8i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.b, z0.b, #7
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; STREAMING-NEXT:    ptrue p0.b, vl8
+; STREAMING-NEXT:    lsl z0.b, z0.b, #7
 ; STREAMING-NEXT:    ptrue p1.b
 ; STREAMING-NEXT:    asr z0.b, z0.b, #7
 ; STREAMING-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.b
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v8i1(<8 x i1> %a, i1 1)
   ret i32 %res
@@ -485,17 +515,20 @@ define i32 @ctz_v4i1(<4 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v4i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; STREAMING-NEXT:    ptrue p0.h, vl4
+; STREAMING-NEXT:    lsl z0.h, z0.h, #15
 ; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
 ; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 0)
   ret i32 %res
@@ -511,17 +544,20 @@ define i32 @ctz_v4i1_poison(<4 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.h
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v4i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.h, z0.h, #15
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; STREAMING-NEXT:    ptrue p0.h, vl4
+; STREAMING-NEXT:    lsl z0.h, z0.h, #15
 ; STREAMING-NEXT:    ptrue p1.h
 ; STREAMING-NEXT:    asr z0.h, z0.h, #15
 ; STREAMING-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.h
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v4i1(<4 x i1> %a, i1 1)
   ret i32 %res
@@ -537,17 +573,20 @@ define i32 @ctz_v2i1(<2 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v2i1:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; STREAMING-NEXT:    ptrue p0.s, vl2
+; STREAMING-NEXT:    lsl z0.s, z0.s, #31
 ; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
 ; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 0)
   ret i32 %res
@@ -563,17 +602,20 @@ define i32 @ctz_v2i1_poison(<2 x i1> %a) {
 ; NONSTREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; NONSTREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; NONSTREAMING-NEXT:    cntp x0, p0, p0.s
+; NONSTREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; NONSTREAMING-NEXT:    ret
 ;
 ; STREAMING-LABEL: ctz_v2i1_poison:
 ; STREAMING:       // %bb.0:
-; STREAMING-NEXT:    lsl z0.s, z0.s, #31
+; STREAMING-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; STREAMING-NEXT:    ptrue p0.s, vl2
+; STREAMING-NEXT:    lsl z0.s, z0.s, #31
 ; STREAMING-NEXT:    ptrue p1.s
 ; STREAMING-NEXT:    asr z0.s, z0.s, #31
 ; STREAMING-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; STREAMING-NEXT:    brkb p0.b, p1/z, p0.b
 ; STREAMING-NEXT:    cntp x0, p0, p0.s
+; STREAMING-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-NEXT:    ret
   %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1)
   ret i32 %res

diff  --git a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
index 62a660f36a86d7..2cf8621ca066dd 100644
--- a/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
+++ b/llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll
@@ -4,6 +4,7 @@
 define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z1.b, b1
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ret
@@ -14,6 +15,7 @@ define <vscale x 16 x i1> @match_nxv16i8_v1i8(<vscale x 16 x i8> %op1, <1 x i8>
 define <vscale x 16 x i1> @match_nxv16i8_v2i8(<vscale x 16 x i8> %op1, <2 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v2i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov w8, v1.s[1]
 ; CHECK-NEXT:    fmov w9, s1
 ; CHECK-NEXT:    ptrue p1.b
@@ -36,6 +38,7 @@ define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8>
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umov w8, v1.h[1]
 ; CHECK-NEXT:    umov w9, v1.h[0]
 ; CHECK-NEXT:    umov w10, v1.h[2]
@@ -64,6 +67,7 @@ define <vscale x 16 x i1> @match_nxv16i8_v4i8(<vscale x 16 x i8> %op1, <4 x i8>
 define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ret
@@ -74,6 +78,7 @@ define <vscale x 16 x i1> @match_nxv16i8_v8i8(<vscale x 16 x i8> %op1, <8 x i8>
 define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8> %op2, <vscale x 16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv16i8_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z1.q, q1
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    ret
@@ -84,6 +89,7 @@ define <vscale x 16 x i1> @match_nxv16i8_v16i8(<vscale x 16 x i8> %op1, <16 x i8
 define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_v16i8_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup v1.16b, v1.b[0]
 ; CHECK-NEXT:    cmeq v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
@@ -95,6 +101,7 @@ define <16 x i1> @match_v16i8_v1i8(<16 x i8> %op1, <1 x i8> %op2, <16 x i1> %mas
 define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_v16i8_v2i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup v3.16b, v1.b[4]
 ; CHECK-NEXT:    dup v1.16b, v1.b[0]
 ; CHECK-NEXT:    cmeq v3.16b, v0.16b, v3.16b
@@ -109,6 +116,7 @@ define <16 x i1> @match_v16i8_v2i8(<16 x i8> %op1, <2 x i8> %op2, <16 x i1> %mas
 define <16 x i1> @match_v16i8_v4i8(<16 x i8> %op1, <4 x i8> %op2, <16 x i1> %mask) #0 {
 ; CHECK-LABEL: match_v16i8_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    dup v3.16b, v1.b[2]
 ; CHECK-NEXT:    dup v4.16b, v1.b[0]
 ; CHECK-NEXT:    dup v5.16b, v1.b[4]
@@ -131,11 +139,14 @@ define <16 x i1> @match_v16i8_v8i8(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mas
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.16b, v2.16b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    cmlt v2.16b, v2.16b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <8 x i8> %op2, <16 x i1> %mask)
   ret <16 x i1> %r
@@ -146,10 +157,13 @@ define <16 x i1> @match_v16i8_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %m
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.16b, v2.16b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmlt v2.16b, v2.16b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <16 x i1> @llvm.experimental.vector.match(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask)
   ret <16 x i1> %r
@@ -160,11 +174,14 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.8b, v2.8b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    cmlt v2.8b, v2.8b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask)
   ret <8 x i1> %r
@@ -173,6 +190,7 @@ define <8 x i1> @match_v8i8_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) #
 define <vscale x 8 x i1> @match_nxv8i16_v8i16(<vscale x 8 x i16> %op1, <8 x i16> %op2, <vscale x 8 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv8i16_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z1.q, q1
 ; CHECK-NEXT:    match p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    ret
@@ -185,6 +203,8 @@ define <8 x i1> @match_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) #0
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v2.8h, v2.8b, #0
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    shl v2.8h, v2.8h, #15
 ; CHECK-NEXT:    cmlt v2.8h, v2.8h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
@@ -203,10 +223,13 @@ define <8 x i1> @match_v8i8_v16i8(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v2.8b, v2.8b, #7
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmlt v2.8b, v2.8b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    match p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %r = tail call <8 x i1> @llvm.experimental.vector.match(<8 x i8> %op1, <16 x i8> %op2, <8 x i1> %mask)
   ret <8 x i1> %r
@@ -220,8 +243,10 @@ define <vscale x 16 x i1> @match_nxv16i8_v32i8(<vscale x 16 x i8> %op1, <32 x i8
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z3.b, z1.b[1]
 ; CHECK-NEXT:    mov z4.b, b1
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z5.b, z1.b[2]
 ; CHECK-NEXT:    cmpeq p2.b, p1/z, z0.b, z3.b
@@ -440,6 +465,7 @@ define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    mov z3.s, s1
 ; CHECK-NEXT:    ptrue p1.s
@@ -464,9 +490,10 @@ define <vscale x 4 x i1> @match_nxv4xi32_v4i32(<vscale x 4 x i32> %op1, <4 x i32
 define <vscale x 2 x i1> @match_nxv2xi64_v2i64(<vscale x 2 x i64> %op1, <2 x i64> %op2, <vscale x 2 x i1> %mask) #0 {
 ; CHECK-LABEL: match_nxv2xi64_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, z1.d[1]
-; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    ptrue p1.d
+; CHECK-NEXT:    mov z1.d, d1
 ; CHECK-NEXT:    cmpeq p2.d, p1/z, z0.d, z2.d
 ; CHECK-NEXT:    cmpeq p1.d, p1/z, z0.d, z1.d
 ; CHECK-NEXT:    sel p1.b, p1, p1.b, p2.b

diff  --git a/llvm/test/CodeGen/AArch64/itofp-bf16.ll b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
index b95f32f9095d95..58591b11c184fb 100644
--- a/llvm/test/CodeGen/AArch64/itofp-bf16.ll
+++ b/llvm/test/CodeGen/AArch64/itofp-bf16.ll
@@ -87,6 +87,7 @@ define bfloat @stofp_i64_bf16(i64 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i64 %a to bfloat
@@ -115,6 +116,7 @@ define bfloat @utofp_i64_bf16(i64 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i64 %a to bfloat
@@ -133,6 +135,7 @@ define bfloat @stofp_i32_bf16(i32 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i32 %a to bfloat
@@ -151,6 +154,7 @@ define bfloat @utofp_i32_bf16(i32 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i32 %a to bfloat
@@ -169,6 +173,7 @@ define bfloat @stofp_i16_bf16(i16 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i16 %a to bfloat
@@ -187,6 +192,7 @@ define bfloat @utofp_i16_bf16(i16 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i16 %a to bfloat
@@ -205,6 +211,7 @@ define bfloat @stofp_i8_bf16(i8 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp i8 %a to bfloat
@@ -223,6 +230,7 @@ define bfloat @utofp_i8_bf16(i8 %a) {
 ; CHECK-NEXT:    add w8, w10, w8
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp i8 %a to bfloat
@@ -280,6 +288,7 @@ define <2 x bfloat> @stofp_v2i64_v2bf16(<2 x i64> %a) {
 ; CHECK-NEXT:    lsr w8, w8, #16
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x bfloat>
@@ -329,6 +338,7 @@ define <2 x bfloat> @utofp_v2i64_v2bf16(<2 x i64> %a) {
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x bfloat>
@@ -338,6 +348,9 @@ entry:
 define <3 x bfloat> @stofp_v3i64_v3bf16(<3 x i64> %a) {
 ; CHECK-LABEL: stofp_v3i64_v3bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -362,6 +375,9 @@ entry:
 define <3 x bfloat> @utofp_v3i64_v3bf16(<3 x i64> %a) {
 ; CHECK-LABEL: utofp_v3i64_v3bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -836,8 +852,9 @@ entry:
 define <2 x bfloat> @stofp_v2i32_v2bf16(<2 x i32> %a) {
 ; CHECK-LABEL: stofp_v2i32_v2bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
 ; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -852,8 +869,9 @@ entry:
 define <2 x bfloat> @utofp_v2i32_v2bf16(<2 x i32> %a) {
 ; CHECK-LABEL: utofp_v2i32_v2bf16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-NEXT:    ushr v2.4s, v0.4s, #16
 ; CHECK-NEXT:    and v1.16b, v2.16b, v1.16b
 ; CHECK-NEXT:    movi v2.4s, #127, msl #8
@@ -1476,6 +1494,7 @@ entry:
 define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-LABEL: stofp_v2i8_v2bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w9, v0.s[1]
 ; CHECK-NEXT:    fmov w10, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
@@ -1496,6 +1515,7 @@ define <2 x bfloat> @stofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = sitofp <2 x i8> %a to <2 x bfloat>
@@ -1505,6 +1525,7 @@ entry:
 define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-LABEL: utofp_v2i8_v2bf16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w9, v0.s[1]
 ; CHECK-NEXT:    fmov w10, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
@@ -1525,6 +1546,7 @@ define <2 x bfloat> @utofp_v2i8_v2bf16(<2 x i8> %a) {
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    fmov s1, w9
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = uitofp <2 x i8> %a to <2 x bfloat>

diff  --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index a7059b6d8768aa..81c1a64f2d434f 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -1081,8 +1081,11 @@ define <3 x fp128> @stofp_v3i64_v3f128(<3 x i64> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatditf
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -1136,8 +1139,11 @@ define <3 x fp128> @utofp_v3i64_v2f128(<3 x i64> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    str q2, [sp, #32] // 16-byte Folded Spill
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatunditf
 ; CHECK-SD-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
@@ -1201,10 +1207,12 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floattidf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattidf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1224,9 +1232,11 @@ define <2 x double> @stofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floattidf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattidf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1255,10 +1265,12 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floatuntidf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntidf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    add sp, sp, #48
@@ -1278,9 +1290,11 @@ define <2 x double> @utofp_v2i128_v2f64(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floatuntidf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntidf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
@@ -1469,18 +1483,29 @@ entry:
 define <3 x double> @stofp_v3i64_v3f64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: stofp_v3i64_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i64_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <3 x i64> %a to <3 x double>
@@ -1490,18 +1515,29 @@ entry:
 define <3 x double> @utofp_v3i64_v3f64(<3 x i64> %a) {
 ; CHECK-SD-LABEL: utofp_v3i64_v3f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i64_v3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <3 x i64> %a to <3 x double>
@@ -1739,6 +1775,7 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatsitf
@@ -1760,6 +1797,7 @@ define <2 x fp128> @stofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatsitf
@@ -1784,6 +1822,7 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-SD-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-SD-NEXT:    .cfi_offset w30, -16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w0, s0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatunsitf
@@ -1805,6 +1844,7 @@ define <2 x fp128> @utofp_v2i32_v2f128(<2 x i32> %a) {
 ; CHECK-GI-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-GI-NEXT:    .cfi_offset w30, -8
 ; CHECK-GI-NEXT:    .cfi_offset b8, -16
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    fmov w0, s0
 ; CHECK-GI-NEXT:    mov s8, v0.s[1]
 ; CHECK-GI-NEXT:    bl __floatunsitf
@@ -1961,8 +2001,10 @@ define <3 x double> @stofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-SD-NEXT:    sshll2 v0.2d, v0.4s, #0
 ; CHECK-SD-NEXT:    scvtf v3.2d, v1.2d
 ; CHECK-SD-NEXT:    scvtf v2.2d, v0.2d
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i32_v3f64:
@@ -1971,6 +2013,7 @@ define <3 x double> @stofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-GI-NEXT:    sshll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    scvtf v3.2d, v1.2d
 ; CHECK-GI-NEXT:    scvtf v2.2d, v0.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v3.d[1]
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    ret
@@ -1986,8 +2029,10 @@ define <3 x double> @utofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-SD-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-SD-NEXT:    ucvtf v3.2d, v1.2d
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v0.2d
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i32_v3f64:
@@ -1996,6 +2041,7 @@ define <3 x double> @utofp_v3i32_v3f64(<3 x i32> %a) {
 ; CHECK-GI-NEXT:    ushll2 v0.2d, v0.4s, #0
 ; CHECK-GI-NEXT:    ucvtf v3.2d, v1.2d
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v0.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v3.d[1]
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    ret
@@ -2387,6 +2433,7 @@ define <2 x fp128> @stofp_v2i16_v2f128(<2 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sxth w0, w8
@@ -2413,6 +2460,7 @@ define <2 x fp128> @utofp_v2i16_v2f128(<2 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    and w0, w8, #0xffff
@@ -2439,6 +2487,7 @@ define <3 x fp128> @stofp_v3i16_v3f128(<3 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w0, v0.h[0]
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __floatsitf
@@ -2467,6 +2516,7 @@ define <3 x fp128> @utofp_v3i16_v3f128(<3 x i16> %a) {
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __floatunsitf
@@ -2538,7 +2588,10 @@ define <3 x double> @stofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    sshll2 v1.2d, v1.4s, #0
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    scvtf v2.2d, v1.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i16_v3f64:
@@ -2548,7 +2601,9 @@ define <3 x double> @stofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-GI-NEXT:    sshll2 v1.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    scvtf v2.2d, v1.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <3 x i16> %a to <3 x double>
@@ -2563,7 +2618,10 @@ define <3 x double> @utofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    ushll2 v1.2d, v1.4s, #0
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v1.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i16_v3f64:
@@ -2573,7 +2631,9 @@ define <3 x double> @utofp_v3i16_v3f64(<3 x i16> %a) {
 ; CHECK-GI-NEXT:    ushll2 v1.2d, v1.4s, #0
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v1.2d
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <3 x i16> %a to <3 x double>
@@ -3023,6 +3083,7 @@ define <2 x fp128> @stofp_v2i8_v2f128(<2 x i8> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    sxtb w0, w8
@@ -3049,6 +3110,7 @@ define <2 x fp128> @utofp_v2i8_v2f128(<2 x i8> %a) {
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    and w0, w8, #0xff
@@ -3237,8 +3299,11 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    sshr v0.2s, v0.2s, #24
 ; CHECK-SD-NEXT:    scvtf v2.2d, v1.2d
 ; CHECK-SD-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: stofp_v3i8_v3f64:
@@ -3255,10 +3320,12 @@ define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    smov x9, v0.h[3]
 ; CHECK-GI-NEXT:    mov v2.d[0], x8
-; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    scvtf v0.2d, v1.2d
-; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sitofp <3 x i8> %a to <3 x double>
@@ -3278,7 +3345,10 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-SD-NEXT:    ucvtf v2.2d, v1.2d
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: utofp_v3i8_v3f64:
@@ -3295,10 +3365,12 @@ define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    mov v1.d[1], x9
 ; CHECK-GI-NEXT:    umov w9, v0.h[3]
 ; CHECK-GI-NEXT:    mov v2.d[0], x8
-; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v1.2d
-; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    mov v2.d[1], x9
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = uitofp <3 x i8> %a to <3 x double>
@@ -3370,6 +3442,7 @@ entry:
 define <8 x double> @stofp_v8i8_v8f64(<8 x i8> %a) {
 ; CHECK-SD-LABEL: stofp_v8i8_v8f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.b[0]
 ; CHECK-SD-NEXT:    umov w9, v0.b[2]
 ; CHECK-SD-NEXT:    umov w11, v0.b[4]
@@ -3426,6 +3499,7 @@ entry:
 define <8 x double> @utofp_v8i8_v8f64(<8 x i8> %a) {
 ; CHECK-SD-LABEL: utofp_v8i8_v8f64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w8, v0.b[0]
 ; CHECK-SD-NEXT:    umov w9, v0.b[2]
 ; CHECK-SD-NEXT:    umov w11, v0.b[4]
@@ -4077,12 +4151,15 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4100,9 +4177,11 @@ define <2 x float> @stofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -4131,12 +4210,15 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    add sp, sp, #48
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4154,9 +4236,11 @@ define <2 x float> @utofp_v2i128_v2f32(<2 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    mov v1.s[1], v0.s[0]
@@ -4190,15 +4274,18 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    mov x0, x22
 ; CHECK-SD-NEXT:    mov x1, x21
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floattisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -4226,13 +4313,16 @@ define <3 x float> @stofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    mov x0, x21
 ; CHECK-GI-NEXT:    mov x1, x22
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floattisf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
@@ -4268,15 +4358,18 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    mov x0, x22
 ; CHECK-SD-NEXT:    mov x1, x21
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    mov x0, x20
 ; CHECK-SD-NEXT:    mov x1, x19
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-SD-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-NEXT:    bl __floatuntisf
 ; CHECK-SD-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-SD-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -4304,13 +4397,16 @@ define <3 x float> @utofp_v3i128_v3f32(<3 x i128> %a) {
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    mov x0, x19
 ; CHECK-GI-NEXT:    mov x1, x20
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    mov x0, x21
 ; CHECK-GI-NEXT:    mov x1, x22
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-NEXT:    bl __floatuntisf
 ; CHECK-GI-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-GI-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
@@ -4349,6 +4445,9 @@ entry:
 define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
 ; CHECK-SD-LABEL: stofp_v3i64_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
@@ -4358,6 +4457,9 @@ define <3 x float> @stofp_v3i64_v3f32(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: stofp_v3i64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-GI-NEXT:    scvtf v0.2d, v0.2d
@@ -4375,6 +4477,9 @@ entry:
 define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
 ; CHECK-SD-LABEL: utofp_v3i64_v3f32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
@@ -4384,6 +4489,9 @@ define <3 x float> @utofp_v3i64_v3f32(<3 x i64> %a) {
 ;
 ; CHECK-GI-LABEL: utofp_v3i64_v3f32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-GI-NEXT:    ucvtf v0.2d, v0.2d
@@ -5533,6 +5641,7 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5552,12 +5661,15 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-FP16-NEXT:    ret
 ;
@@ -5583,6 +5695,7 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5600,9 +5713,11 @@ define <2 x half> @stofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v0.h[0]
@@ -5639,6 +5754,7 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5658,12 +5774,15 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    add sp, sp, #48
 ; CHECK-SD-FP16-NEXT:    ret
 ;
@@ -5689,6 +5808,7 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #48
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5706,9 +5826,11 @@ define <2 x half> @utofp_v2i128_v2f16(<2 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v0.h[0]
@@ -5758,6 +5880,7 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #64
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5782,15 +5905,18 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x22
 ; CHECK-SD-FP16-NEXT:    mov x1, x21
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floattihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -5833,7 +5959,8 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmov d0, d1
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -5856,19 +5983,23 @@ define <3 x half> @stofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x21
 ; CHECK-GI-FP16-NEXT:    mov x1, x22
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floattihf
 ; CHECK-GI-FP16-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-FP16-NEXT:    fmov d0, d1
+; CHECK-GI-FP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
@@ -5914,6 +6045,7 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-NOFP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    add sp, sp, #64
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
@@ -5938,15 +6070,18 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    mov x0, x22
 ; CHECK-SD-FP16-NEXT:    mov x1, x21
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov x0, x20
 ; CHECK-SD-FP16-NEXT:    mov x1, x19
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-SD-FP16-NEXT:    bl __floatuntihf
 ; CHECK-SD-FP16-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-SD-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-SD-FP16-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldp x22, x21, [sp, #32] // 16-byte Folded Reload
 ; CHECK-SD-FP16-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -5989,7 +6124,8 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-NOFP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-NOFP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-NOFP16-NEXT:    fmov d0, d1
+; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NOFP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
@@ -6012,19 +6148,23 @@ define <3 x half> @utofp_v3i128_v3f16(<3 x i128> %a) {
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x19
 ; CHECK-GI-FP16-NEXT:    mov x1, x20
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    mov x0, x21
 ; CHECK-GI-FP16-NEXT:    mov x1, x22
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-GI-FP16-NEXT:    bl __floatuntihf
 ; CHECK-GI-FP16-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; CHECK-GI-FP16-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-GI-FP16-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
 ; CHECK-GI-FP16-NEXT:    mov v1.h[1], v2.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v1.h[2], v0.h[0]
-; CHECK-GI-FP16-NEXT:    fmov d0, d1
+; CHECK-GI-FP16-NEXT:    mov v0.16b, v1.16b
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    add sp, sp, #80
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
@@ -6042,6 +6182,7 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: stofp_v2i64_v2f16:
@@ -6051,6 +6192,7 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-FP16-NEXT:    scvtf h0, x9
 ; CHECK-SD-FP16-NEXT:    scvtf h1, x8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v2i64_v2f16:
@@ -6069,6 +6211,7 @@ define <2 x half> @stofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h0, d0
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = sitofp <2 x i64> %a to <2 x half>
@@ -6085,6 +6228,7 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: utofp_v2i64_v2f16:
@@ -6094,6 +6238,7 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-SD-FP16-NEXT:    ucvtf h0, x9
 ; CHECK-SD-FP16-NEXT:    ucvtf h1, x8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v2i64_v2f16:
@@ -6112,6 +6257,7 @@ define <2 x half> @utofp_v2i64_v2f16(<2 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h0, d0
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = uitofp <2 x i64> %a to <2 x half>
@@ -6121,6 +6267,9 @@ entry:
 define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-SD-LABEL: stofp_v3i64_v3f16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    scvtf v0.2d, v0.2d
@@ -6131,6 +6280,9 @@ define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v3i64_v3f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NOFP16-NEXT:    scvtf v1.2d, v2.2d
 ; CHECK-GI-NOFP16-NEXT:    scvtf v0.2d, v0.2d
@@ -6141,6 +6293,9 @@ define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: stofp_v3i64_v3f16:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    scvtf v2.2d, v2.2d
 ; CHECK-GI-FP16-NEXT:    scvtf v0.2d, v0.2d
@@ -6150,6 +6305,7 @@ define <3 x half> @stofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = sitofp <3 x i64> %a to <3 x half>
@@ -6159,6 +6315,9 @@ entry:
 define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-SD-LABEL: utofp_v3i64_v3f16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-SD-NEXT:    ucvtf v0.2d, v0.2d
@@ -6169,6 +6328,9 @@ define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v3i64_v3f16:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-NOFP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NOFP16-NEXT:    ucvtf v1.2d, v2.2d
 ; CHECK-GI-NOFP16-NEXT:    ucvtf v0.2d, v0.2d
@@ -6179,6 +6341,9 @@ define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ;
 ; CHECK-GI-FP16-LABEL: utofp_v3i64_v3f16:
 ; CHECK-GI-FP16:       // %bb.0: // %entry
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-FP16-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-GI-FP16-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-FP16-NEXT:    ucvtf v2.2d, v2.2d
 ; CHECK-GI-FP16-NEXT:    ucvtf v0.2d, v0.2d
@@ -6188,6 +6353,7 @@ define <3 x half> @utofp_v3i64_v3f16(<3 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h1, d1
 ; CHECK-GI-FP16-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v2.h[0]
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = uitofp <3 x i64> %a to <3 x half>
@@ -6226,6 +6392,7 @@ define <4 x half> @stofp_v4i64_v4f16(<4 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h2, d3
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = sitofp <4 x i64> %a to <4 x half>
@@ -6264,6 +6431,7 @@ define <4 x half> @utofp_v4i64_v4f16(<4 x i64> %a) {
 ; CHECK-GI-FP16-NEXT:    fcvt h2, d3
 ; CHECK-GI-FP16-NEXT:    mov v0.h[2], v1.h[0]
 ; CHECK-GI-FP16-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-FP16-NEXT:    ret
 entry:
   %c = uitofp <4 x i64> %a to <4 x half>
@@ -7005,6 +7173,7 @@ entry:
 define <2 x half> @stofp_v2i32_v2f16(<2 x i32> %a) {
 ; CHECK-SD-LABEL: stofp_v2i32_v2f16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-SD-NEXT:    ret
@@ -7024,6 +7193,7 @@ entry:
 define <2 x half> @utofp_v2i32_v2f16(<2 x i32> %a) {
 ; CHECK-SD-LABEL: utofp_v2i32_v2f16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-SD-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-SD-NEXT:    ret
@@ -7729,6 +7899,7 @@ entry:
 define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-LABEL: stofp_v2i8_v2f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-SD-NOFP16-NEXT:    sxtb w9, w9
@@ -7738,10 +7909,12 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: stofp_v2i8_v2f16:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-FP16-NEXT:    fmov w9, s0
 ; CHECK-SD-FP16-NEXT:    sxtb w9, w9
@@ -7749,6 +7922,7 @@ define <2 x half> @stofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-FP16-NEXT:    scvtf h0, w9
 ; CHECK-SD-FP16-NEXT:    scvtf h1, w8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: stofp_v2i8_v2f16:
@@ -7776,6 +7950,7 @@ entry:
 define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-LABEL: utofp_v2i8_v2f16:
 ; CHECK-SD-NOFP16:       // %bb.0: // %entry
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-SD-NOFP16-NEXT:    and w9, w9, #0xff
@@ -7785,10 +7960,12 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-NOFP16-NEXT:    fcvt h2, s0
 ; CHECK-SD-NOFP16-NEXT:    fcvt h0, s1
 ; CHECK-SD-NOFP16-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NOFP16-NEXT:    ret
 ;
 ; CHECK-SD-FP16-LABEL: utofp_v2i8_v2f16:
 ; CHECK-SD-FP16:       // %bb.0: // %entry
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-FP16-NEXT:    fmov w9, s0
 ; CHECK-SD-FP16-NEXT:    and w9, w9, #0xff
@@ -7796,6 +7973,7 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
 ; CHECK-SD-FP16-NEXT:    ucvtf h0, w9
 ; CHECK-SD-FP16-NEXT:    ucvtf h1, w8
 ; CHECK-SD-FP16-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-FP16-NEXT:    ret
 ;
 ; CHECK-GI-NOFP16-LABEL: utofp_v2i8_v2f16:

diff  --git a/llvm/test/CodeGen/AArch64/ldexp.ll b/llvm/test/CodeGen/AArch64/ldexp.ll
index 0a3a85ab181866..6019fa1490e3dc 100644
--- a/llvm/test/CodeGen/AArch64/ldexp.ll
+++ b/llvm/test/CodeGen/AArch64/ldexp.ll
@@ -6,10 +6,13 @@
 define double @testExp(double %val, i32 %a) {
 ; SVE-LABEL: testExp:
 ; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; SVE-NEXT:    sxtw x8, w0
 ; SVE-NEXT:    ptrue p0.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    fmov d1, x8
 ; SVE-NEXT:    fscale z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; WINDOWS-LABEL: testExp:
@@ -25,10 +28,13 @@ declare double @ldexp(double, i32) memory(none)
 define double @testExpIntrinsic(double %val, i32 %a) {
 ; SVE-LABEL: testExpIntrinsic:
 ; SVE:       // %bb.0: // %entry
+; SVE-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; SVE-NEXT:    sxtw x8, w0
 ; SVE-NEXT:    ptrue p0.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    fmov d1, x8
 ; SVE-NEXT:    fscale z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; WINDOWS-LABEL: testExpIntrinsic:
@@ -44,7 +50,9 @@ define float @testExpf(float %val, i32 %a) {
 ; SVELINUX:       // %bb.0: // %entry
 ; SVELINUX-NEXT:    fmov s1, w0
 ; SVELINUX-NEXT:    ptrue p0.s
+; SVELINUX-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; SVELINUX-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
+; SVELINUX-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; SVELINUX-NEXT:    ret
 ;
 ; SVEWINDOWS-LABEL: testExpf:
@@ -64,7 +72,9 @@ define float @testExpfIntrinsic(float %val, i32 %a) {
 ; SVE:       // %bb.0: // %entry
 ; SVE-NEXT:    fmov s1, w0
 ; SVE-NEXT:    ptrue p0.s
+; SVE-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; SVE-NEXT:    fscale z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; WINDOWS-LABEL: testExpfIntrinsic:

diff  --git a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
index 2776bd68639f3c..7e28c863b07a9a 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv-fp16.ll
@@ -10,12 +10,14 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)
@@ -29,12 +31,14 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/llrint-conv.ll b/llvm/test/CodeGen/AArch64/llrint-conv.ll
index 6ca211917ab9e1..3a6396d120f791 100644
--- a/llvm/test/CodeGen/AArch64/llrint-conv.ll
+++ b/llvm/test/CodeGen/AArch64/llrint-conv.ll
@@ -7,6 +7,7 @@ define i32 @testmsws(float %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x0, s0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.f32(float %x)
@@ -30,6 +31,7 @@ define i32 @testmswd(double %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx d0, d0
 ; CHECK-NEXT:    fcvtzs x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llrint.f64(double %x)
@@ -55,6 +57,7 @@ define i32 @testmswl(fp128 %x) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl llrintl
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
index d6adcc3f0ebb9a..4bf65e7d6fd088 100644
--- a/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/llround-conv-fp16.ll
@@ -7,11 +7,13 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)
@@ -24,11 +26,13 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.llround.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/llvm.exp10.ll b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
index 93eed72178cb31..c1ea891bc86e7e 100644
--- a/llvm/test/CodeGen/AArch64/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.exp10.ll
@@ -57,6 +57,7 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov h1, v0.h[1]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    fcvt s0, h1
@@ -87,6 +88,7 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.h[3], v1.h[0]
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -98,6 +100,7 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; GISEL-NEXT:    .cfi_def_cfa_offset 32
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset b8, -16
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov h8, v0.h[1]
 ; GISEL-NEXT:    fcvt s0, h0
 ; GISEL-NEXT:    bl exp10f
@@ -111,6 +114,7 @@ define <2 x half> @exp10_v2f16(<2 x half> %x) {
 ; GISEL-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v0.h[1], v1.h[0]
+; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    add sp, sp, #32
 ; GISEL-NEXT:    ret
   %r = call <2 x half> @llvm.exp10.v2f16(<2 x half> %x)
@@ -124,6 +128,7 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov h1, v0.h[1]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    fcvt s0, h1
@@ -154,6 +159,7 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.h[3], v1.h[0]
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -166,6 +172,7 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; GISEL-NEXT:    .cfi_offset w30, -16
 ; GISEL-NEXT:    .cfi_offset b8, -24
 ; GISEL-NEXT:    .cfi_offset b9, -32
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov h8, v0.h[1]
 ; GISEL-NEXT:    mov h9, v0.h[2]
 ; GISEL-NEXT:    fcvt s0, h0
@@ -186,7 +193,8 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) {
 ; GISEL-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v1.h[1], v2.h[0]
 ; GISEL-NEXT:    mov v1.h[2], v0.h[0]
-; GISEL-NEXT:    fmov d0, d1
+; GISEL-NEXT:    mov v0.16b, v1.16b
+; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    add sp, sp, #64
 ; GISEL-NEXT:    ret
   %r = call <3 x half> @llvm.exp10.v3f16(<3 x half> %x)
@@ -200,6 +208,7 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov h1, v0.h[1]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    fcvt s0, h1
@@ -230,6 +239,7 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.h[3], v1.h[0]
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -244,6 +254,7 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; GISEL-NEXT:    .cfi_offset b8, -16
 ; GISEL-NEXT:    .cfi_offset b9, -24
 ; GISEL-NEXT:    .cfi_offset b10, -32
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov h8, v0.h[1]
 ; GISEL-NEXT:    mov h9, v0.h[2]
 ; GISEL-NEXT:    mov h10, v0.h[3]
@@ -273,7 +284,8 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) {
 ; GISEL-NEXT:    mov v1.h[1], v3.h[0]
 ; GISEL-NEXT:    mov v1.h[2], v2.h[0]
 ; GISEL-NEXT:    mov v1.h[3], v0.h[0]
-; GISEL-NEXT:    fmov d0, d1
+; GISEL-NEXT:    mov v0.16b, v1.16b
+; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    add sp, sp, #80
 ; GISEL-NEXT:    ret
   %r = call <4 x half> @llvm.exp10.v4f16(<4 x half> %x)
@@ -289,14 +301,28 @@ define float @exp10_f32(float %x) {
 }
 
 define <1 x float> @exp10_v1f32(<1 x float> %x) {
-; CHECK-LABEL: exp10_v1f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset w30, -16
-; CHECK-NEXT:    bl exp10f
-; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    ret
+; SDAG-LABEL: exp10_v1f32:
+; SDAG:       // %bb.0:
+; SDAG-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; SDAG-NEXT:    .cfi_def_cfa_offset 16
+; SDAG-NEXT:    .cfi_offset w30, -16
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
+; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; SDAG-NEXT:    bl exp10f
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $d0
+; SDAG-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; SDAG-NEXT:    ret
+;
+; GISEL-LABEL: exp10_v1f32:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; GISEL-NEXT:    .cfi_def_cfa_offset 16
+; GISEL-NEXT:    .cfi_offset w30, -16
+; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $d0
+; GISEL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISEL-NEXT:    ret
   %r = call <1 x float> @llvm.exp10.v1f32(<1 x float> %x)
   ret <1 x float> %r
 }
@@ -308,15 +334,20 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
 ; SDAG-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; SDAG-NEXT:    .cfi_def_cfa_offset 48
 ; SDAG-NEXT:    .cfi_offset w30, -16
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.s[1], v1.s[0]
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    add sp, sp, #48
 ; SDAG-NEXT:    ret
 ;
@@ -328,12 +359,16 @@ define <2 x float> @exp10_v2f32(<2 x float> %x) {
 ; GISEL-NEXT:    .cfi_def_cfa_offset 32
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset b8, -16
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov s8, v0.s[1]
+; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v1.s[1], v0.s[0]
@@ -354,16 +389,20 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    mov v0.s[1], v1.s[0]
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov s0, v0.s[2]
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v1.s[2], v0.s[0]
 ; SDAG-NEXT:    mov v0.16b, v1.16b
@@ -381,14 +420,18 @@ define <3 x float> @exp10_v3f32(<3 x float> %x) {
 ; GISEL-NEXT:    .cfi_offset b9, -32
 ; GISEL-NEXT:    mov s8, v0.s[1]
 ; GISEL-NEXT:    mov s9, v0.s[2]
+; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s9
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldp q2, q1, [sp] // 32-byte Folded Reload
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldp d9, d8, [sp, #32] // 16-byte Folded Reload
 ; GISEL-NEXT:    mov v1.s[1], v2.s[0]
@@ -410,22 +453,27 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov s0, v0.s[1]
 ; SDAG-NEXT:    bl exp10f
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    mov v0.s[1], v1.s[0]
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov s0, v0.s[2]
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    mov v1.s[2], v0.s[0]
 ; SDAG-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov s0, v0.s[3]
 ; SDAG-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    bl exp10f
 ; SDAG-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v1.s[3], v0.s[0]
 ; SDAG-NEXT:    mov v0.16b, v1.16b
@@ -446,17 +494,22 @@ define <4 x float> @exp10_v4f32(<4 x float> %x) {
 ; GISEL-NEXT:    mov s8, v0.s[1]
 ; GISEL-NEXT:    mov s9, v0.s[2]
 ; GISEL-NEXT:    mov s10, v0.s[3]
+; GISEL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s8
 ; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s9
 ; GISEL-NEXT:    bl exp10f
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov s0, s10
 ; GISEL-NEXT:    bl exp10f
 ; GISEL-NEXT:    ldp q2, q1, [sp, #16] // 32-byte Folded Reload
+; GISEL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
 ; GISEL-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
@@ -502,10 +555,13 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    bl exp10
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    bl exp10
 ; SDAG-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; SDAG-NEXT:    mov v0.d[1], v1.d[0]
 ; SDAG-NEXT:    add sp, sp, #48
@@ -520,11 +576,14 @@ define <2 x double> @exp10_v2f64(<2 x double> %x) {
 ; GISEL-NEXT:    .cfi_offset w30, -8
 ; GISEL-NEXT:    .cfi_offset b8, -16
 ; GISEL-NEXT:    mov d8, v0.d[1]
+; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d8
 ; GISEL-NEXT:    bl exp10
 ; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #24] // 8-byte Folded Reload
 ; GISEL-NEXT:    ldr d8, [sp, #16] // 8-byte Folded Reload
 ; GISEL-NEXT:    mov v1.d[1], v0.d[0]
@@ -604,17 +663,22 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; SDAG-NEXT:    bl exp10
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    bl exp10
 ; SDAG-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    mov v0.d[1], v1.d[0]
 ; SDAG-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; SDAG-NEXT:    mov d0, v0.d[1]
 ; SDAG-NEXT:    bl exp10
+; SDAG-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; SDAG-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; SDAG-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; SDAG-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; SDAG-NEXT:    bl exp10
 ; SDAG-NEXT:    fmov d1, d0
 ; SDAG-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
@@ -635,23 +699,28 @@ define <4 x double> @exp10_v4f64(<4 x double> %x) {
 ; GISEL-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    mov d8, v0.d[1]
 ; GISEL-NEXT:    mov d9, v1.d[1]
+; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d8
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; GISEL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; GISEL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; GISEL-NEXT:    bl exp10
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; GISEL-NEXT:    fmov d0, d9
 ; GISEL-NEXT:    bl exp10
-; GISEL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
-; GISEL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; GISEL-NEXT:    ldp q1, q2, [sp, #16] // 32-byte Folded Reload
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; GISEL-NEXT:    fmov d2, d1
-; GISEL-NEXT:    ldp q1, q3, [sp] // 32-byte Folded Reload
+; GISEL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; GISEL-NEXT:    mov v2.d[1], v1.d[0]
+; GISEL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; GISEL-NEXT:    mov v1.d[1], v0.d[0]
-; GISEL-NEXT:    mov v2.d[1], v3.d[0]
 ; GISEL-NEXT:    mov v0.16b, v2.16b
 ; GISEL-NEXT:    add sp, sp, #80
 ; GISEL-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
index 98c50d772f9dee..e4cb8ed6eaf90f 100644
--- a/llvm/test/CodeGen/AArch64/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
@@ -9,6 +9,7 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    add x0, sp, #36
@@ -45,7 +46,9 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
   %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
@@ -67,11 +70,14 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
 ; CHECK-NEXT:    add x0, sp, #56
 ; CHECK-NEXT:    add x19, sp, #56
 ; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    add x0, sp, #60
 ; CHECK-NEXT:    add x20, sp, #60
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
@@ -81,6 +87,7 @@ define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
 ; CHECK-NEXT:    bl frexpf
 ; CHECK-NEXT:    ldr s1, [sp, #44]
 ; CHECK-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x19]
 ; CHECK-NEXT:    mov v2.s[2], v0.s[0]

diff  --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 8e70d79fa61eff..c5efc796e7a3c4 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -109,6 +109,7 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 64
 ; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    add x0, sp, #36
@@ -151,6 +152,8 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; CHECK-NEXT:    mov v1.h[2], v4.h[0]
 ; CHECK-NEXT:    mov v0.h[3], v2.h[0]
 ; CHECK-NEXT:    mov v1.h[3], v3.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 ;
@@ -166,6 +169,7 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
 ; NO-LIBCALL-NEXT:    .cfi_offset b10, -40
 ; NO-LIBCALL-NEXT:    .cfi_offset b11, -48
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    mov h1, v0.h[1]
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fcvt s8, h1
@@ -220,10 +224,13 @@ define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
 ; NO-LIBCALL-NEXT:    fmov s1, s0
 ; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    fcvt h2, s1
-; NO-LIBCALL-NEXT:    ldp q1, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; NO-LIBCALL-NEXT:    add sp, sp, #80
 ; NO-LIBCALL-NEXT:    ret
   %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
@@ -281,6 +288,7 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
 ; CHECK-NEXT:    add x0, sp, #20
 ; CHECK-NEXT:    add x1, sp, #16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #28
@@ -320,10 +328,13 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
@@ -331,14 +342,18 @@ define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
 ; NO-LIBCALL-NEXT:    fmov s0, s9
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s9
@@ -364,9 +379,11 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    add x0, sp, #44
 ; CHECK-NEXT:    add x1, sp, #40
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #28
@@ -380,6 +397,8 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-NEXT:    add sp, sp, #64
 ; CHECK-NEXT:    ret
 ;
@@ -391,26 +410,34 @@ define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
 ; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 64
 ; NO-LIBCALL-NEXT:    .cfi_offset w30, -8
 ; NO-LIBCALL-NEXT:    .cfi_offset b8, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    fmov s1, s0
 ; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
 ; NO-LIBCALL-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    mov v1.s[1], v2.s[0]
+; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; NO-LIBCALL-NEXT:    add sp, sp, #64
 ; NO-LIBCALL-NEXT:    ret
   %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
@@ -468,6 +495,7 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
 ; CHECK-NEXT:    add x0, sp, #56
 ; CHECK-NEXT:    add x1, sp, #40
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl sincos
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x0, sp, #32
@@ -497,16 +525,21 @@ define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
 ; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov d0, d8
 ; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sin
 ; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.d[1], v1.d[0]
 ; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov d0, d8
 ; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cos
 ; NO-LIBCALL-NEXT:    fmov d1, d0
 ; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
index 2c9811dbcf992f..3fa5d64a210e19 100644
--- a/llvm/test/CodeGen/AArch64/load.ll
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -233,6 +233,7 @@ define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b) {
 ; CHECK-SD-NEXT:    ld1 { v0.b }[0], [x0]
 ; CHECK-SD-NEXT:    add x8, x0, #1
 ; CHECK-SD-NEXT:    ld1 { v0.b }[4], [x8]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: load_v2i8:
@@ -240,6 +241,7 @@ define <2 x i8> @load_v2i8(ptr %ptr, <2 x i8> %b) {
 ; CHECK-GI-NEXT:    ld1 { v0.b }[0], [x0]
 ; CHECK-GI-NEXT:    ldr b1, [x0, #1]
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <2 x i8>, ptr %ptr
   ret <2 x i8> %a
@@ -270,6 +272,7 @@ define <2 x i16> @load_v2i16(ptr %ptr) {
 ; CHECK-SD-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-SD-NEXT:    add x8, x0, #2
 ; CHECK-SD-NEXT:    ld1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: load_v2i16:
@@ -277,6 +280,7 @@ define <2 x i16> @load_v2i16(ptr %ptr) {
 ; CHECK-GI-NEXT:    ld1 { v0.h }[0], [x0]
 ; CHECK-GI-NEXT:    ldr h1, [x0, #2]
 ; CHECK-GI-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <2 x i16>, ptr %ptr
   ret <2 x i16> %a
@@ -362,6 +366,7 @@ define <7 x i8> @load_v7i8(ptr %ptr) {
 ; CHECK-GI-NEXT:    mov v0.b[5], v1.b[0]
 ; CHECK-GI-NEXT:    ldr b1, [x0, #6]
 ; CHECK-GI-NEXT:    mov v0.b[6], v1.b[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <7 x i8>, ptr %ptr
   ret <7 x i8> %a
@@ -380,6 +385,7 @@ define <3 x i16> @load_v3i16(ptr %ptr) {
 ; CHECK-GI-NEXT:    ld1 { v0.h }[1], [x8]
 ; CHECK-GI-NEXT:    add x8, x0, #4
 ; CHECK-GI-NEXT:    ld1 { v0.h }[2], [x8]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %a = load <3 x i16>, ptr %ptr
   ret <3 x i16> %a

diff  --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll
index 0a62f8076a1be0..31047954401cf5 100644
--- a/llvm/test/CodeGen/AArch64/logic-shift.ll
+++ b/llvm/test/CodeGen/AArch64/logic-shift.ll
@@ -5,6 +5,7 @@ define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
@@ -64,6 +65,7 @@ define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: or_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
@@ -137,6 +139,7 @@ define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: or_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    orr w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    orr w0, w8, w3
 ; CHECK-NEXT:    ret
@@ -230,6 +233,7 @@ define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
@@ -289,6 +293,7 @@ define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: xor_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
@@ -362,6 +367,7 @@ define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: xor_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    eor w0, w8, w3
 ; CHECK-NEXT:    ret
@@ -455,6 +461,7 @@ define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_lshr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and w8, w8, #0xff
 ; CHECK-NEXT:    lsr w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
@@ -514,6 +521,7 @@ define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
 ; CHECK-LABEL: and_ashr_commute0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    sxth w8, w8
 ; CHECK-NEXT:    asr w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
@@ -587,6 +595,7 @@ define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
 ; CHECK-LABEL: and_shl_commute1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, w1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    lsl w8, w8, w2
 ; CHECK-NEXT:    and w0, w8, w3
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
index 69bb6c3d0a0978..6771a862c386fd 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll
@@ -10,12 +10,14 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)
@@ -29,12 +31,14 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    frintx s0, s0
 ; CHECK-NOFP16-NEXT:    fcvtzs x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/lrint-conv.ll b/llvm/test/CodeGen/AArch64/lrint-conv.ll
index e37556032732de..60393b4f18d9d0 100644
--- a/llvm/test/CodeGen/AArch64/lrint-conv.ll
+++ b/llvm/test/CodeGen/AArch64/lrint-conv.ll
@@ -7,6 +7,7 @@ define i32 @testmsws(float %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x0, s0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f32(float %x)
@@ -30,6 +31,7 @@ define i32 @testmswd(double %x) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    frintx d0, d0
 ; CHECK-NEXT:    fcvtzs x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lrint.i64.f64(double %x)
@@ -55,6 +57,7 @@ define dso_local i32 @testmswl(fp128 %x) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl lrintl
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
index 980d420e90c48d..bf78fd456eac0e 100644
--- a/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/lround-conv-fp16.ll
@@ -7,11 +7,13 @@ define i16 @testmhhs(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhhs:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)
@@ -24,11 +26,13 @@ define i32 @testmhws(half %x) {
 ; CHECK-NOFP16:       // %bb.0: // %entry
 ; CHECK-NOFP16-NEXT:    fcvt s0, h0
 ; CHECK-NOFP16-NEXT:    fcvtas x0, s0
+; CHECK-NOFP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NOFP16-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: testmhws:
 ; CHECK-FP16:       // %bb.0: // %entry
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
+; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.lround.i64.f16(half %x)

diff  --git a/llvm/test/CodeGen/AArch64/lslfast.ll b/llvm/test/CodeGen/AArch64/lslfast.ll
index 391ecda2c8c7de..5ec70b5f229754 100644
--- a/llvm/test/CodeGen/AArch64/lslfast.ll
+++ b/llvm/test/CodeGen/AArch64/lslfast.ll
@@ -60,6 +60,7 @@ entry:
 define i64 @testsext3(i32 noundef %x, i64 noundef %y, i64 noundef %z) {
 ; CHECK-LABEL: testsext3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfiz x8, x0, #3, #32
 ; CHECK-NEXT:    add x9, x8, x1
 ; CHECK-NEXT:    add x8, x8, x2
@@ -77,6 +78,7 @@ entry:
 define i64 @testzext3(i32 noundef %x, i64 noundef %y, i64 noundef %z) {
 ; CHECK-LABEL: testzext3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x8, x0, #3, #32
 ; CHECK-NEXT:    add x9, x8, x1
 ; CHECK-NEXT:    add x8, x8, x2

diff  --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
index 57f05a6832a4f5..4c8e589391c3ac 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll
@@ -4,6 +4,7 @@
 define void @fma_dup_f16(ptr noalias nocapture noundef readonly %A, half noundef %B, ptr noalias nocapture noundef %C, i32 noundef %n) {
 ; CHECK-LABEL: fma_dup_f16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    cbz w2, .LBB0_8
 ; CHECK-NEXT:  // %bb.1: // %for.body.preheader
 ; CHECK-NEXT:    cmp w2, #15

diff  --git a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
index 7452a7b96d327f..f6bbdf5d95d870 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll
@@ -4,6 +4,7 @@
 define void @foo(i32 noundef %limit, ptr %out, ptr %y) {
 ; CHECK-LABEL: foo:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmp w0, #1
 ; CHECK-NEXT:    b.lt .LBB0_10
 ; CHECK-NEXT:  // %bb.1: // %for.cond1.preheader.us.preheader

diff  --git a/llvm/test/CodeGen/AArch64/memset-inline.ll b/llvm/test/CodeGen/AArch64/memset-inline.ll
index d2ca291ba0c0ca..02d852b5ce45ad 100644
--- a/llvm/test/CodeGen/AArch64/memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/memset-inline.ll
@@ -29,7 +29,7 @@ define void @memset_2(ptr %a, i8 %value) nounwind {
 define void @memset_4(ptr %a, i8 %value) nounwind {
 ; ALL-LABEL: memset_4:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    mov w8, #16843009 // =0x1010101
+; ALL-NEXT:    mov w8, #16843009
 ; ALL-NEXT:    and w9, w1, #0xff
 ; ALL-NEXT:    mul w8, w9, w8
 ; ALL-NEXT:    str w8, [x0]
@@ -41,7 +41,8 @@ define void @memset_4(ptr %a, i8 %value) nounwind {
 define void @memset_8(ptr %a, i8 %value) nounwind {
 ; ALL-LABEL: memset_8:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
+; ALL-NEXT:    mov x8, #72340172838076673
 ; ALL-NEXT:    and x9, x1, #0xff
 ; ALL-NEXT:    mul x8, x9, x8
 ; ALL-NEXT:    str x8, [x0]
@@ -53,7 +54,8 @@ define void @memset_8(ptr %a, i8 %value) nounwind {
 define void @memset_16(ptr %a, i8 %value) nounwind {
 ; ALL-LABEL: memset_16:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
+; ALL-NEXT:    mov x8, #72340172838076673
 ; ALL-NEXT:    and x9, x1, #0xff
 ; ALL-NEXT:    mul x8, x9, x8
 ; ALL-NEXT:    stp x8, x8, [x0]
@@ -65,7 +67,8 @@ define void @memset_16(ptr %a, i8 %value) nounwind {
 define void @memset_32(ptr %a, i8 %value) nounwind {
 ; GPR-LABEL: memset_32:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #16]
@@ -84,7 +87,8 @@ define void @memset_32(ptr %a, i8 %value) nounwind {
 define void @memset_64(ptr %a, i8 %value) nounwind {
 ; GPR-LABEL: memset_64:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #48]
@@ -108,7 +112,8 @@ define void @memset_64(ptr %a, i8 %value) nounwind {
 define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
 ; ALL-LABEL: aligned_memset_16:
 ; ALL:       // %bb.0:
-; ALL-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; ALL-NEXT:    // kill: def $w1 killed $w1 def $x1
+; ALL-NEXT:    mov x8, #72340172838076673
 ; ALL-NEXT:    and x9, x1, #0xff
 ; ALL-NEXT:    mul x8, x9, x8
 ; ALL-NEXT:    stp x8, x8, [x0]
@@ -120,7 +125,8 @@ define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
 define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
 ; GPR-LABEL: aligned_memset_32:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #16]
@@ -139,7 +145,8 @@ define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
 define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
 ; GPR-LABEL: aligned_memset_64:
 ; GPR:       // %bb.0:
-; GPR-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; GPR-NEXT:    // kill: def $w1 killed $w1 def $x1
+; GPR-NEXT:    mov x8, #72340172838076673
 ; GPR-NEXT:    and x9, x1, #0xff
 ; GPR-NEXT:    mul x8, x9, x8
 ; GPR-NEXT:    stp x8, x8, [x0, #48]

diff  --git a/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll b/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
index cda064a0fc53d0..97cfb13bcd5eb3 100644
--- a/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
+++ b/llvm/test/CodeGen/AArch64/memset-vs-memset-inline.ll
@@ -7,7 +7,8 @@ declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 define void @test1(ptr %a, i8 %value) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #72340172838076673 // =0x101010101010101
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-NEXT:    mov x8, #72340172838076673
 ; CHECK-NEXT:    and x9, x1, #0xff
 ; CHECK-NEXT:    mul x8, x9, x8
 ; CHECK-NEXT:    str x8, [x0]
@@ -19,7 +20,7 @@ define void @test1(ptr %a, i8 %value) nounwind {
 define void @regular_memset_calls_external_function(ptr %a, i8 %value) nounwind {
 ; CHECK-LABEL: regular_memset_calls_external_function:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w2, #1024 // =0x400
+; CHECK-NEXT:    mov w2, #1024
 ; CHECK-NEXT:    b memset
   tail call void @llvm.memset.p0.i64(ptr %a, i8 %value, i64 1024, i1 0)
   ret void
@@ -31,37 +32,6 @@ define void @inlined_set_doesnt_call_external_function(ptr %a, i8 %value) nounwi
 ; CHECK-NEXT:    dup v0.16b, w1
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    stp q0, q0, [x0, #32]
-; CHECK-NEXT:    stp q0, q0, [x0, #64]
-; CHECK-NEXT:    stp q0, q0, [x0, #96]
-; CHECK-NEXT:    stp q0, q0, [x0, #128]
-; CHECK-NEXT:    stp q0, q0, [x0, #160]
-; CHECK-NEXT:    stp q0, q0, [x0, #192]
-; CHECK-NEXT:    stp q0, q0, [x0, #224]
-; CHECK-NEXT:    stp q0, q0, [x0, #256]
-; CHECK-NEXT:    stp q0, q0, [x0, #288]
-; CHECK-NEXT:    stp q0, q0, [x0, #320]
-; CHECK-NEXT:    stp q0, q0, [x0, #352]
-; CHECK-NEXT:    stp q0, q0, [x0, #384]
-; CHECK-NEXT:    stp q0, q0, [x0, #416]
-; CHECK-NEXT:    stp q0, q0, [x0, #448]
-; CHECK-NEXT:    stp q0, q0, [x0, #480]
-; CHECK-NEXT:    stp q0, q0, [x0, #512]
-; CHECK-NEXT:    stp q0, q0, [x0, #544]
-; CHECK-NEXT:    stp q0, q0, [x0, #576]
-; CHECK-NEXT:    stp q0, q0, [x0, #608]
-; CHECK-NEXT:    stp q0, q0, [x0, #640]
-; CHECK-NEXT:    stp q0, q0, [x0, #672]
-; CHECK-NEXT:    stp q0, q0, [x0, #704]
-; CHECK-NEXT:    stp q0, q0, [x0, #736]
-; CHECK-NEXT:    stp q0, q0, [x0, #768]
-; CHECK-NEXT:    stp q0, q0, [x0, #800]
-; CHECK-NEXT:    stp q0, q0, [x0, #832]
-; CHECK-NEXT:    stp q0, q0, [x0, #864]
-; CHECK-NEXT:    stp q0, q0, [x0, #896]
-; CHECK-NEXT:    stp q0, q0, [x0, #928]
-; CHECK-NEXT:    stp q0, q0, [x0, #960]
-; CHECK-NEXT:    stp q0, q0, [x0, #992]
-; CHECK-NEXT:    ret
   tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1024, i1 0)
   ret void
 }

diff  --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
index 823dd815d556c4..ea40f9e52dcd68 100644
--- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
+++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir
@@ -1595,10 +1595,10 @@ body:             |
 # CHECK-NEXT: D0_HI [0B,48r:0)[192r,224r:1) 0 at 0B-phi 1 at 192r
 # CHECK-NEXT: D1_HI [0B,88r:0)[208r,224r:1) 0 at 0B-phi 1 at 208r
 # CHECK-NEXT: D2_HI [0B,96r:0) 0 at 0B-phi
-# CHECK-NEXT: %0 [48r,168r:0) 0 at 48r  L000000000000002B [48r,168r:0) 0 at 48r  L0000000000000004 [48r,104r:0) 0 at 48r  weight:0.000000e+00
+# CHECK-NEXT: %0 [48r,168r:0) 0 at 48r  weight:0.000000e+00
 # CHECK-NEXT: %1 [88r,120r:0) 0 at 88r  weight:0.000000e+00
 # CHECK-NEXT: %2 [96r,128r:0) 0 at 96r  weight:0.000000e+00
-# CHECK-NEXT: %3 [104r,176r:0) 0 at 104r  L000000000000002B [104r,176r:0) 0 at 104r  L0000000000000004 [104r,104d:0) 0 at 104r  weight:0.000000e+00
+# CHECK-NEXT: %3 [104r,176r:0) 0 at 104r  weight:0.000000e+00
 # CHECK-NEXT: %6 [80r,128r:0) 0 at 80r  weight:0.000000e+00
 # CHECK-NEXT: %7 [128r,160r:0) 0 at 128r  weight:0.000000e+00
 # CHECK-NEXT: %8 [120r,136r:0) 0 at 120r  weight:0.000000e+00

diff  --git a/llvm/test/CodeGen/AArch64/mla_mls_merge.ll b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
index 5dbf522f38ba4e..17abaf387c1804 100644
--- a/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
+++ b/llvm/test/CodeGen/AArch64/mla_mls_merge.ll
@@ -102,6 +102,7 @@ define <4 x i16> @test_mls0(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
 ; CHECK-NEXT:    umlsl v0.8h, v2.8b, v3.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b)
@@ -117,6 +118,7 @@ define <4 x i16> @test_mls1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
 ; CHECK-NEXT:    smlsl v0.8h, v2.8b, v3.8b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull.i = tail call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b)
@@ -132,6 +134,7 @@ define <2 x i32> @test_mls2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umull v0.4s, v0.4h, v1.4h
 ; CHECK-NEXT:    umlsl v0.4s, v2.4h, v3.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b)
@@ -147,6 +150,7 @@ define <2 x i32> @test_mls3(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    smull v0.4s, v0.4h, v1.4h
 ; CHECK-NEXT:    smlsl v0.4s, v2.4h, v3.4h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b)
@@ -162,6 +166,7 @@ define <1 x i64> @test_mls4(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    umull v0.2d, v0.2s, v1.2s
 ; CHECK-NEXT:    umlsl v0.2d, v2.2s, v3.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b)
@@ -177,6 +182,7 @@ define <1 x i64> @test_mls5(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32>
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
 ; CHECK-NEXT:    smlsl v0.2d, v2.2s, v3.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %vmull2.i = tail call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b)

diff  --git a/llvm/test/CodeGen/AArch64/mul.ll b/llvm/test/CodeGen/AArch64/mul.ll
index 206561079d4beb..9ca975d9e742e1 100644
--- a/llvm/test/CodeGen/AArch64/mul.ll
+++ b/llvm/test/CodeGen/AArch64/mul.ll
@@ -423,8 +423,14 @@ entry:
 define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d3
 ; CHECK-SD-NEXT:    fmov x9, d0
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    fmov x10, d1
 ; CHECK-SD-NEXT:    fmov x11, d2
 ; CHECK-SD-NEXT:    mul x8, x9, x8
@@ -439,6 +445,10 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    fmov x8, d0
@@ -453,6 +463,7 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-GI-NEXT:    fmov x9, d5
 ; CHECK-GI-NEXT:    mul x8, x8, x9
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-bitcast.ll b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
index ef66d77cf4862d..d06612e2332e6e 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitcast.ll
@@ -519,6 +519,7 @@ define <2 x i16> @bitcast_i32_to_v2i16(i32 %word) {
 ; CHECK-LE:       // %bb.0:
 ; CHECK-LE-NEXT:    fmov s0, w0
 ; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: bitcast_i32_to_v2i16:
@@ -559,6 +560,7 @@ define <2 x i8> @bitcast_i16_to_v2i8(i16 %word) {
 ; CHECK-LE-NEXT:    umov w9, v0.b[1]
 ; CHECK-LE-NEXT:    fmov s0, w8
 ; CHECK-LE-NEXT:    mov v0.s[1], w9
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-LE-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: bitcast_i16_to_v2i8:

diff  --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 0f482e2caecc1c..f6dbf5251fc272 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1238,18 +1238,23 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <8 x i8> @vselect_equivalent_shuffle_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    adrp x8, .LCPI89_0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI89_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI89_0
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI89_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %c
@@ -1265,10 +1270,12 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) {
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i8_zero:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI90_0
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI90_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %c
@@ -1286,8 +1293,9 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zero(<8 x i8> %a) {
 define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    adrp x8, .LCPI91_0
+; CHECK-SD-NEXT:    mov v0.d[1], v0.d[0]
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI91_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
@@ -1295,10 +1303,12 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI91_0
 ; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
 ; CHECK-GI-NEXT:    ldr d0, [x8, :lo12:.LCPI91_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b }, v0.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i8> zeroinitializer, <8 x i8> %a, <8 x i32> <i32 8, i32 0, i32 10, i32 1, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i8> %c
@@ -1322,12 +1332,23 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) {
 ; CHECK-SD-NEXT: .byte   14
 ; CHECK-SD-NEXT: .byte   15
 define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: vselect_equivalent_shuffle_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI92_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI92_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    adrp x8, .LCPI92_0
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI92_0]
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI92_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI92_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ret
   %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %c
 }
@@ -1351,8 +1372,9 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zero:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI93_0
+; CHECK-GI-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI93_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
@@ -1386,8 +1408,9 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zeroswap:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    movi v31.2d, #0000000000000000
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q31_q0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI94_0
+; CHECK-GI-NEXT:    movi v31.2d, #0000000000000000
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI94_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v31.16b, v0.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
@@ -1398,16 +1421,22 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) {
 define <4 x i16> @vselect_equivalent_shuffle_v4i16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v4i16:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-SD-NEXT:    mov v0.h[2], v1.h[1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI95_0
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI95_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 3>
   ret <4 x i16> %c
@@ -1423,7 +1452,9 @@ define <4 x i32> @vselect_equivalent_shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI96_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI96_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
   %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 3>

diff  --git a/llvm/test/CodeGen/AArch64/neon-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
index baab8b7111acce..cf09a46000dab9 100644
--- a/llvm/test/CodeGen/AArch64/neon-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-dot-product.ll
@@ -103,6 +103,7 @@ entry:
 define <2 x i32> @test_vdot_lane_u32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -116,6 +117,7 @@ entry:
 define <4 x i32> @test_vdotq_lane_u32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_u32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -156,6 +158,7 @@ entry:
 define <2 x i32> @test_vdot_lane_u32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_u32_zero:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -170,6 +173,7 @@ entry:
 define <4 x i32> @test_vdotq_lane_u32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_u32_zero:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    udot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -213,6 +217,7 @@ entry:
 define <2 x i32> @test_vdot_lane_s32(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -226,6 +231,7 @@ entry:
 define <4 x i32> @test_vdotq_lane_s32(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_s32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -266,6 +272,7 @@ entry:
 define <2 x i32> @test_vdot_lane_s32_zero(<2 x i32> %a, <8 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdot_lane_s32_zero:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.2s, v1.8b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -280,6 +287,7 @@ entry:
 define <4 x i32> @test_vdotq_lane_s32_zero(<4 x i32> %a, <16 x i8> %b, <8 x i8> %c) {
 ; CHECK-LABEL: test_vdotq_lane_s32_zero:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    sdot v0.4s, v1.16b, v2.4b[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
index bcf333ce9e0719..93a50ec305e1e5 100644
--- a/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extadd-extract.ll
@@ -6,6 +6,7 @@ define <4 x i16> @addls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addls_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    saddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addls_v8i8_0:
@@ -26,7 +27,9 @@ entry:
 define <4 x i16> @addws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addws_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    saddw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addws_v8i8_0:
@@ -45,6 +48,7 @@ define <4 x i16> @addlu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addlu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    uaddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addlu_v8i8_0:
@@ -65,7 +69,9 @@ entry:
 define <4 x i16> @addwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: addwu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    uaddw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addwu_v8i8_0:
@@ -84,6 +90,7 @@ define <4 x i16> @subls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: subls_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ssubl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subls_v8i8_0:
@@ -104,7 +111,9 @@ entry:
 define <4 x i16> @subws_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: subws_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ssubw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subws_v8i8_0:
@@ -123,6 +132,7 @@ define <4 x i16> @sublu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: sublu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    usubl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sublu_v8i8_0:
@@ -143,7 +153,9 @@ entry:
 define <4 x i16> @subwu_v8i8_0(<4 x i16> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: subwu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    usubw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subwu_v8i8_0:
@@ -162,6 +174,7 @@ define <4 x i16> @mulls_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: mulls_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    smull v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mulls_v8i8_0:
@@ -196,6 +209,7 @@ define <4 x i16> @mullu_v8i8_0(<8 x i8> %s0, <8 x i8> %s1) {
 ; CHECK-SD-LABEL: mullu_v8i8_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    umull v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mullu_v8i8_0:
@@ -230,6 +244,7 @@ define <2 x i32> @addls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addls_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addls_v4i16_0:
@@ -250,7 +265,9 @@ entry:
 define <2 x i32> @addws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addws_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    saddw v0.4s, v0.4s, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addws_v4i16_0:
@@ -269,6 +286,7 @@ define <2 x i32> @addlu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addlu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addlu_v4i16_0:
@@ -289,7 +307,9 @@ entry:
 define <2 x i32> @addwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: addwu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    uaddw v0.4s, v0.4s, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addwu_v4i16_0:
@@ -308,6 +328,7 @@ define <2 x i32> @subls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: subls_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ssubl v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subls_v4i16_0:
@@ -328,7 +349,9 @@ entry:
 define <2 x i32> @subws_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: subws_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ssubw v0.4s, v0.4s, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subws_v4i16_0:
@@ -347,6 +370,7 @@ define <2 x i32> @sublu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: sublu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    usubl v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sublu_v4i16_0:
@@ -367,7 +391,9 @@ entry:
 define <2 x i32> @subwu_v4i16_0(<2 x i32> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: subwu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    usubw v0.4s, v0.4s, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subwu_v4i16_0:
@@ -386,6 +412,7 @@ define <2 x i32> @mulls_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: mulls_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    smull v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mulls_v4i16_0:
@@ -420,6 +447,7 @@ define <2 x i32> @mullu_v4i16_0(<4 x i16> %s0, <4 x i16> %s1) {
 ; CHECK-SD-LABEL: mullu_v4i16_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    umull v0.4s, v0.4h, v1.4h
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mullu_v4i16_0:
@@ -454,6 +482,7 @@ define <1 x i64> @addls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addls_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addls_v2i32_0:
@@ -477,7 +506,9 @@ entry:
 define <1 x i64> @addws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addws_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    saddw v0.2d, v0.2d, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addws_v2i32_0:
@@ -499,6 +530,7 @@ define <1 x i64> @addlu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addlu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addlu_v2i32_0:
@@ -522,7 +554,9 @@ entry:
 define <1 x i64> @addwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: addwu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    uaddw v0.2d, v0.2d, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: addwu_v2i32_0:
@@ -544,6 +578,7 @@ define <1 x i64> @subls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: subls_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ssubl v0.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subls_v2i32_0:
@@ -567,7 +602,9 @@ entry:
 define <1 x i64> @subws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: subws_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    ssubw v0.2d, v0.2d, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subws_v2i32_0:
@@ -589,6 +626,7 @@ define <1 x i64> @sublu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: sublu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    usubl v0.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sublu_v2i32_0:
@@ -612,7 +650,9 @@ entry:
 define <1 x i64> @subwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: subwu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    usubw v0.2d, v0.2d, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: subwu_v2i32_0:
@@ -634,6 +674,7 @@ define <1 x i64> @mulls_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: mulls_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    smull v0.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mulls_v2i32_0:
@@ -655,14 +696,24 @@ entry:
 }
 
 define <1 x i64> @mulws_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
-; CHECK-LABEL: mulws_v2i32_0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    sshll v1.2d, v1.2s, #0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: mulws_v2i32_0:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov x8, d0
+; CHECK-SD-NEXT:    fmov x9, d1
+; CHECK-SD-NEXT:    mul x8, x8, x9
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: mulws_v2i32_0:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    sshll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    mul x8, x8, x9
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
 entry:
   %s1s = sext <2 x i32> %s1 to <2 x i64>
   %t1 = shufflevector <2 x i64> %s1s, <2 x i64> poison, <1 x i32> <i32 0>
@@ -674,6 +725,7 @@ define <1 x i64> @mullu_v2i32_0(<2 x i32> %s0, <2 x i32> %s1) {
 ; CHECK-SD-LABEL: mullu_v2i32_0:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    umull v0.2d, v0.2s, v1.2s
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mullu_v2i32_0:
@@ -695,14 +747,24 @@ entry:
 }
 
 define <1 x i64> @mulwu_v2i32_0(<1 x i64> %s0, <2 x i32> %s1) {
-; CHECK-LABEL: mulwu_v2i32_0:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
-; CHECK-NEXT:    fmov x8, d0
-; CHECK-NEXT:    fmov x9, d1
-; CHECK-NEXT:    mul x8, x8, x9
-; CHECK-NEXT:    fmov d0, x8
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: mulwu_v2i32_0:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov x8, d0
+; CHECK-SD-NEXT:    fmov x9, d1
+; CHECK-SD-NEXT:    mul x8, x8, x9
+; CHECK-SD-NEXT:    fmov d0, x8
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: mulwu_v2i32_0:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-GI-NEXT:    fmov x8, d0
+; CHECK-GI-NEXT:    fmov x9, d1
+; CHECK-GI-NEXT:    mul x8, x8, x9
+; CHECK-GI-NEXT:    fmov d0, x8
+; CHECK-GI-NEXT:    ret
 entry:
   %s1s = zext <2 x i32> %s1 to <2 x i64>
   %t1 = shufflevector <2 x i64> %s1s, <2 x i64> poison, <1 x i32> <i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/neon-extract.ll b/llvm/test/CodeGen/AArch64/neon-extract.ll
index a071df1fd49d6c..26c4ec4d4bdb25 100644
--- a/llvm/test/CodeGen/AArch64/neon-extract.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extract.ll
@@ -261,6 +261,7 @@ entry:
 define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) {
 ; CHECK-LABEL: test_undef_vext_s16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
index 5714a142b39f2c..3f590226c47150 100644
--- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
+++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll
@@ -50,6 +50,7 @@ define <8 x i8> @extract_2_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.b[6], w9
 ; CHECK-NEXT:    mov v0.b[7], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %a0 = extractelement <4 x i32> %a, i32 0
@@ -82,6 +83,10 @@ entry:
 define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
 ; CHECK-LABEL: extract_4_v4i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
@@ -201,6 +206,8 @@ define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 ; CHECK-LABEL: extract_4_mixed:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    xtn v2.4h, v2.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    xtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
@@ -260,8 +267,12 @@ entry:
 define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
 ; CHECK-LABEL: extract_4_v4i32_badindex:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
index 4c589abff1cfcd..0f4eec4fdfda1b 100644
--- a/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
+++ b/llvm/test/CodeGen/AArch64/neon-insert-sve-elt.ll
@@ -9,7 +9,9 @@
 define <8 x i8> @test_lane0_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: test_lane0_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[0], v1.b[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 16 x i8> %b, i32 0
     %d = insertelement <8 x i8> %a, i8 %c, i32 0
@@ -19,7 +21,9 @@ define <8 x i8> @test_lane0_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
 define <8 x i8> @test_lane15_nxv16i8(<8 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-LABEL: test_lane15_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.b[7], v1.b[15]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 16 x i8> %b, i32 15
     %d = insertelement <8 x i8> %a, i8 %c, i32 7
@@ -64,7 +68,9 @@ define <16 x i8> @test_q_lane16_nxv16i8(<16 x i8> %a, <vscale x 16 x i8> %b) {
 define <4 x half> @test_lane0_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: test_lane0_nxv8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x half> %b, i32 0
     %d = insertelement <4 x half> %a, half %c, i32 0
@@ -74,7 +80,9 @@ define <4 x half> @test_lane0_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
 define <4 x half> @test_lane7_nxv8f16(<4 x half> %a, <vscale x 8 x half> %b) {
 ; CHECK-LABEL: test_lane7_nxv8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[7]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x half> %b, i32 7
     %d = insertelement <4 x half> %a, half %c, i32 3
@@ -118,7 +126,9 @@ define <8 x half> @test_q_lane8_nxv8f16(<8 x half> %a, <vscale x 8 x half> %b) {
 define <4 x bfloat> @test_lane0_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
 ; CHECK-LABEL: test_lane0_nxv8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x bfloat> %b, i32 0
     %d = insertelement <4 x bfloat> %a, bfloat %c, i32 0
@@ -128,7 +138,9 @@ define <4 x bfloat> @test_lane0_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat>
 define <4 x bfloat> @test_lane7_nxv8bf16(<4 x bfloat> %a, <vscale x 8 x bfloat> %b) {
 ; CHECK-LABEL: test_lane7_nxv8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[7]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x bfloat> %b, i32 7
     %d = insertelement <4 x bfloat> %a, bfloat %c, i32 3
@@ -172,7 +184,9 @@ define <8 x bfloat> @test_q_lane8_nxv8bf16(<8 x bfloat> %a, <vscale x 8 x bfloat
 define <4 x i16> @test_lane0_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: test_lane0_nxv8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[0], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x i16> %b, i32 0
     %d = insertelement <4 x i16> %a, i16 %c, i32 0
@@ -182,7 +196,9 @@ define <4 x i16> @test_lane0_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
 define <4 x i16> @test_lane7_nxv8i16(<4 x i16> %a, <vscale x 8 x i16> %b) {
 ; CHECK-LABEL: test_lane7_nxv8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[7]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 8 x i16> %b, i32 7
     %d = insertelement <4 x i16> %a, i16 %c, i32 3
@@ -227,7 +243,9 @@ define <8 x i16> @test_q_lane8_nxv8i16(<8 x i16> %a, <vscale x 8 x i16> %b) {
 define <2 x float> @test_lane0_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: test_lane0_nxv4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x float> %b, i32 0
     %d = insertelement <2 x float> %a, float %c, i32 0
@@ -237,7 +255,9 @@ define <2 x float> @test_lane0_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b)
 define <2 x float> @test_lane3_nxv4f32(<2 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-LABEL: test_lane3_nxv4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[3]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x float> %b, i32 3
     %d = insertelement <2 x float> %a, float %c, i32 1
@@ -281,7 +301,9 @@ define <4 x float> @test_q_lane4_nxv4f32(<4 x float> %a, <vscale x 4 x float> %b
 define <2 x i32> @test_lane0_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: test_lane0_nxv4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x i32> %b, i32 0
     %d = insertelement <2 x i32> %a, i32 %c, i32 0
@@ -291,7 +313,9 @@ define <2 x i32> @test_lane0_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
 define <2 x i32> @test_lane3_nxv4i32(<2 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-LABEL: test_lane3_nxv4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[3]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 4 x i32> %b, i32 3
     %d = insertelement <2 x i32> %a, i32 %c, i32 1
@@ -337,6 +361,7 @@ define <1 x double> @test_lane0_nxv2f64(<1 x double> %a, <vscale x 2 x double> %
 ; CHECK-LABEL: test_lane0_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x double> %b, i32 0
     %d = insertelement <1 x double> %a, double %c, i32 0
@@ -347,6 +372,7 @@ define <1 x double> @test_lane1_nxv2f64(<1 x double> %a, <vscale x 2 x double> %
 ; CHECK-LABEL: test_lane1_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x double> %b, i32 1
     %d = insertelement <1 x double> %a, double %c, i32 0
@@ -391,6 +417,7 @@ define <1 x i64> @test_lane0_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: test_lane0_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x i64> %b, i32 0
     %d = insertelement <1 x i64> %a, i64 %c, i32 0
@@ -401,6 +428,7 @@ define <1 x i64> @test_lane1_nxv2i64(<1 x i64> %a, <vscale x 2 x i64> %b) {
 ; CHECK-LABEL: test_lane1_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov v0.d[0], v1.d[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
     %c = extractelement <vscale x 2 x i64> %b, i32 1
     %d = insertelement <1 x i64> %a, i64 %c, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
index 5b87142294839f..28961376d2b6d6 100644
--- a/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
+++ b/llvm/test/CodeGen/AArch64/neon-insextbitcast.ll
@@ -4,6 +4,7 @@
 define <4 x i32> @test_vins_v4i32(<4 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v4i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[3], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -15,6 +16,7 @@ entry:
 define <4 x i32> @test_vins_v4i32_0(<4 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v4i32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -26,7 +28,10 @@ entry:
 define <2 x i32> @test_vins_v2i32(<2 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v2i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = bitcast float %b to i32
@@ -37,7 +42,10 @@ entry:
 define <2 x i32> @test_vins_v2i32_0(<2 x i32> %a, float %b) {
 ; CHECK-LABEL: test_vins_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[0], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %c = bitcast float %b to i32
@@ -48,6 +56,7 @@ entry:
 define <2 x i64> @test_vins_v2i64(<2 x i64> %a, double %b) {
 ; CHECK-LABEL: test_vins_v2i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -59,6 +68,7 @@ entry:
 define <2 x i64> @test_vins_v2i64_0(<2 x i64> %a, double %b) {
 ; CHECK-LABEL: test_vins_v2i64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[0], v1.d[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -83,6 +93,7 @@ define float @test_vext_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: test_vext_v4i32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.s[0], v0.s[3]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <4 x i32> %a, i32 3
@@ -93,6 +104,7 @@ entry:
 define float @test_vext_v4i32_0(<4 x i32> %a) {
 ; CHECK-LABEL: test_vext_v4i32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <4 x i32> %a, i32 0
@@ -103,7 +115,9 @@ entry:
 define float @test_vext_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: test_vext_v2i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[0], v0.s[1]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i32> %a, i32 1
@@ -114,6 +128,8 @@ entry:
 define float @test_vext_v2i32_0(<2 x i32> %a) {
 ; CHECK-LABEL: test_vext_v2i32_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i32> %a, i32 0
@@ -125,6 +141,7 @@ define double @test_vext_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: test_vext_v2i64:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov v0.d[0], v0.d[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i64> %a, i32 1
@@ -135,6 +152,7 @@ entry:
 define double @test_vext_v2i64_0(<2 x i64> %a) {
 ; CHECK-LABEL: test_vext_v2i64_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i64> %a, i32 0
@@ -145,6 +163,8 @@ entry:
 define double @test_vext_v1i64(<1 x i64> %a) {
 ; CHECK-LABEL: test_vext_v1i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <1 x i64> %a, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/neon-luti.ll b/llvm/test/CodeGen/AArch64/neon-luti.ll
index 6380bfd370a470..54366627537626 100644
--- a/llvm/test/CodeGen/AArch64/neon-luti.ll
+++ b/llvm/test/CodeGen/AArch64/neon-luti.ll
@@ -4,6 +4,8 @@
 define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> %vn, <8 x i8> %vm, i32 0)
@@ -13,6 +15,7 @@ define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){
 define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> %vn, <16 x i8> %vm, i32 0)
@@ -22,6 +25,7 @@ define <16 x i8> @test_luti2_laneq_i8(<8 x i8> %vn, <16 x i8> %vm){
 define <16 x i8> @test_luti2q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
@@ -40,6 +44,8 @@ define <16 x i8> @test_luti2q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
 define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
@@ -49,6 +55,7 @@ define <8 x i16> @test_luti2_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
 define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> %vn, <16 x i8> %vm, i32 0)
@@ -58,6 +65,8 @@ define <8 x i16> @test_luti2_laneq_i16(<4 x i16> %vn, <16 x i8> %vm){
 define <8 x i16> @test_luti2q_lane_i16(<4 x i16> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<4 x i16> %vn, <8 x i8> %vm, i32 0)
@@ -76,6 +85,8 @@ define <8 x i16> @test_luti2q_laneq_i16(<8 x i16> %vn, <16 x i8> %vm){
 define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> %vn, <8 x i8> %vm, i32 0)
@@ -85,6 +96,7 @@ define <8 x half> @test_luti2_lane_f16(<4 x half> %vn, <8 x i8> %vm){
 define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4i16(<4 x half> %vn, <16 x i8> %vm, i32 0)
@@ -94,6 +106,7 @@ define <8 x half> @test_luti2_laneq_f16(<4 x half> %vn, <16 x i8> %vm){
 define <8 x half> @test_luti2q_lane_f16(<8 x half> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> %vn, <8 x i8> %vm, i32 0)
@@ -112,6 +125,8 @@ define <8 x half> @test_luti2q_laneq_f16(<8 x half> %vn, <16 x i8> %vm){
 define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2_lane_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
@@ -121,6 +136,7 @@ define <8 x bfloat> @test_luti2_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
 define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti2_laneq_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> %vn, <16 x i8> %vm, i32 0)
@@ -130,6 +146,8 @@ define <8 x bfloat> @test_luti2_laneq_bf16(<4 x bfloat> %vn, <16 x i8> %vm){
 define <8 x bfloat> @test_luti2q_lane_bf16(<4 x bfloat> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti2q_lane_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti2 v0.8h, { v0.8h }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<4 x bfloat> %vn, <8 x i8> %vm, i32 0)
@@ -148,6 +166,7 @@ define <8 x bfloat> @test_luti2q_laneq_bf16(<8 x bfloat> %vn, <16 x i8> %vm){
 define <16 x i8> @test_luti4q_lane_i8(<16 x i8> %vn, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    luti4 v0.16b, { v0.16b }, v1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> %vn, <8 x i8> %vm, i32 0)
@@ -166,6 +185,9 @@ define <16 x i8> @test_luti4q_laneq_i8(<16 x i8> %vn, <16 x i8> %vm){
 define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_x2_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i8> %vm, i32 1)
@@ -175,6 +197,8 @@ define <8 x i16> @test_luti4q_lane_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <8 x i
 define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_laneq_x2_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x i8> %vm, i32 1)
@@ -184,6 +208,9 @@ define <8 x i16> @test_luti4q_laneq_x2_i16(<8 x i16> %vn1, <8 x i16> %vn2, <16 x
 define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <8 x i8> %vm, i32 1)
@@ -194,6 +221,8 @@ define <8 x half> @test_luti4q_lane_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <8 x
 define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_laneq_x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> %vn1, <8 x half> %vn2, <16 x i8> %vm, i32 1)
@@ -203,6 +232,8 @@ define <8 x half> @test_luti4q_laneq_x2_f16(<8 x half>%vn1, <8 x half> %vn2, <16
 define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <16 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_laneq_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <16 x i8> %vm, i32 1)
@@ -212,6 +243,9 @@ define <8 x bfloat> @test_luti4q_laneq_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %v
 define <8 x bfloat> @test_luti4q_lane_x2_bf16(<8 x bfloat>%vn1, <8 x bfloat> %vn2, <8 x i8> %vm){
 ; CHECK-LABEL: test_luti4q_lane_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    luti4 v0.8h, { v0.8h, v1.8h }, v2[1]
 ; CHECK-NEXT:    ret
    %res= tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> %vn1, <8 x bfloat> %vn2, <8 x i8> %vm, i32 1)

diff  --git a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
index f2d6f1dc042d1b..c1b9a4c9dbb797 100644
--- a/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
+++ b/llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll
@@ -35,6 +35,7 @@ define <2 x i32> @udot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
 ; CHECK-NODOT-LABEL: udot_narrow:
 ; CHECK-NODOT:       // %bb.0:
 ; CHECK-NODOT-NEXT:    umull v1.8h, v2.8b, v1.8b
+; CHECK-NODOT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NODOT-NEXT:    ushll v2.4s, v1.4h, #0
 ; CHECK-NODOT-NEXT:    ushll2 v3.4s, v1.8h, #0
 ; CHECK-NODOT-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -84,6 +85,7 @@ define <2 x i32> @sdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) {
 ; CHECK-NODOT-LABEL: sdot_narrow:
 ; CHECK-NODOT:       // %bb.0:
 ; CHECK-NODOT-NEXT:    smull v1.8h, v2.8b, v1.8b
+; CHECK-NODOT-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NODOT-NEXT:    sshll v2.4s, v1.4h, #0
 ; CHECK-NODOT-NEXT:    sshll2 v3.4s, v1.8h, #0
 ; CHECK-NODOT-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
@@ -131,6 +133,7 @@ define <2 x i32> @usdot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
 ; CHECK-NOI8MM:       // %bb.0:
 ; CHECK-NOI8MM-NEXT:    ushll v1.8h, v1.8b, #0
 ; CHECK-NOI8MM-NEXT:    sshll v2.8h, v2.8b, #0
+; CHECK-NOI8MM-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOI8MM-NEXT:    smull v3.4s, v2.4h, v1.4h
 ; CHECK-NOI8MM-NEXT:    smull2 v4.4s, v2.8h, v1.8h
 ; CHECK-NOI8MM-NEXT:    ext v5.16b, v1.16b, v1.16b, #8
@@ -184,6 +187,7 @@ define <2 x i32> @sudot_narrow(<2 x i32> %acc, <8 x i8> %u, <8 x i8> %s) #0{
 ; CHECK-NOI8MM:       // %bb.0:
 ; CHECK-NOI8MM-NEXT:    sshll v1.8h, v1.8b, #0
 ; CHECK-NOI8MM-NEXT:    ushll v2.8h, v2.8b, #0
+; CHECK-NOI8MM-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOI8MM-NEXT:    smull v3.4s, v2.4h, v1.4h
 ; CHECK-NOI8MM-NEXT:    smull2 v4.4s, v2.8h, v1.8h
 ; CHECK-NOI8MM-NEXT:    ext v5.16b, v1.16b, v1.16b, #8
@@ -382,6 +386,7 @@ define <2 x i32> @not_udot_narrow(<2 x i32> %acc, <4 x i8> %u, <4 x i8> %s) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    bic v1.4h, #255, lsl #8
 ; CHECK-NEXT:    bic v2.4h, #255, lsl #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umull v3.4s, v2.4h, v1.4h
 ; CHECK-NEXT:    umlal v0.4s, v2.4h, v1.4h
 ; CHECK-NEXT:    ext v1.16b, v3.16b, v3.16b, #8

diff  --git a/llvm/test/CodeGen/AArch64/neon-perm.ll b/llvm/test/CodeGen/AArch64/neon-perm.ll
index 55609000f79af8..7218204ba844ca 100644
--- a/llvm/test/CodeGen/AArch64/neon-perm.ll
+++ b/llvm/test/CodeGen/AArch64/neon-perm.ll
@@ -1800,6 +1800,7 @@ define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
 ; CHECK-GI-LABEL: test_vzip1_v4i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
  %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  ret <4 x i8> %lo
@@ -3996,6 +3997,7 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    xtn v2.8b, v0.8h
 ; CHECK-SD-NEXT:    uzp2 v1.16b, v0.16b, v0.16b
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    fmov d0, d2
 ; CHECK-SD-NEXT:    ret
 ;
@@ -4003,6 +4005,7 @@ define %struct.uint8x8x2_t @test_uzp(<16 x i8> %y) {
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    uzp1 v2.16b, v0.16b, v0.16b
 ; CHECK-GI-NEXT:    uzp2 v1.16b, v0.16b, v0.16b
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-GI-NEXT:    fmov d0, d2
 ; CHECK-GI-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
index e1c7dbc79566c5..abdfb996fa1668 100644
--- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll
@@ -32,8 +32,9 @@ define <4 x i32> @v4i32(<4 x i32> %a) {
 ; CHECK-GI-LABEL: v4i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI2_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -60,8 +61,9 @@ define <8 x i16> @v8i16(<8 x i16> %a) {
 ; CHECK-GI-LABEL: v8i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI4_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -69,12 +71,23 @@ entry:
 }
 
 define <8 x i16> @v8i16_2(<4 x i16> %a, <4 x i16> %b) {
-; CHECK-LABEL: v8i16_2:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    adrp x8, .LCPI5_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v8i16_2:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    adrp x8, .LCPI5_0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v8i16_2:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI5_0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI5_0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <8 x i16> %V128
@@ -100,8 +113,9 @@ define <16 x i8> @v16i8(<16 x i8> %a) {
 ; CHECK-GI-LABEL: v16i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI7_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -109,12 +123,23 @@ entry:
 }
 
 define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: v16i8_2:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    adrp x8, .LCPI8_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: v16i8_2:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    adrp x8, .LCPI8_0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: v16i8_2:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    adrp x8, .LCPI8_0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ret <16 x i8> %V128
@@ -150,8 +175,9 @@ define <4 x float> @v4f32(<4 x float> %a) {
 ; CHECK-GI-LABEL: v4f32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI11_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -178,8 +204,9 @@ define <8 x half> @v8f16(<8 x half> %a) {
 ; CHECK-GI-LABEL: v8f16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    adrp x8, .LCPI13_0
-; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI13_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
 entry:
   %V128 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/neon-rshrn.ll b/llvm/test/CodeGen/AArch64/neon-rshrn.ll
index 2fe3c092aab26d..e648b10ea357bd 100644
--- a/llvm/test/CodeGen/AArch64/neon-rshrn.ll
+++ b/llvm/test/CodeGen/AArch64/neon-rshrn.ll
@@ -850,6 +850,7 @@ entry:
 define void @rshrn_v4i16_5(<4 x i16> %a, ptr %p) {
 ; CHECK-LABEL: rshrn_v4i16_5:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn v0.8b, v0.8h, #5
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret
@@ -883,6 +884,7 @@ entry:
 define void @rshrn_v1i64_5(<1 x i64> %a, ptr %p) {
 ; CHECK-LABEL: rshrn_v1i64_5:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    rshrn v0.2s, v0.2d, #5
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
index 2d28d80c353346..544d7680f01b80 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -51,6 +51,7 @@ define float @test_fmla_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
 define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -61,6 +62,7 @@ define float @test_fmla_ss2S_0(float %a, float %b, <2 x float> %v) {
 define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_0_swap:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s2, s1, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -71,6 +73,7 @@ define float @test_fmla_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
 define float @test_fmla_ss2S_1(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -114,6 +117,7 @@ define float @test_fmla_ss4S_0_ext0(float %a, <4 x float> %v, <4 x float> %w) {
 define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_3_ext0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmla s0, s1, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp0 = extractelement <2 x float> %v, i32 0
@@ -125,6 +129,7 @@ define float @test_fmla_ss2S_3_ext0(float %a, <2 x float> %v) {
 define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) {
 ; CHECK-LABEL: test_fmla_ss2S_3_ext0_swp:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmla s0, s1, v1.s[1]
 ; CHECK-NEXT:    ret
   %tmp0 = extractelement <2 x float> %v, i32 0
@@ -136,6 +141,8 @@ define float @test_fmla_ss2S_3_ext0_swp(float %a, <2 x float> %v) {
 define float @test_fmla_ss2S_0_ext0(float %a, <2 x float> %v, <2 x float> %w) {
 ; CHECK-LABEL: test_fmla_ss2S_0_ext0:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-NEXT:    ret
   %tmp0 = extractelement <2 x float> %v, i32 0
@@ -288,6 +295,7 @@ define float @test_fmls_ss4S_3_swap(float %a, float %b, <4 x float> %v) {
 define float @test_fmls_ss2S_0(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmls_ss2S_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -300,6 +308,7 @@ entry:
 define float @test_fmls_ss2S_0_swap(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmls_ss2S_0_swap:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -312,6 +321,7 @@ entry:
 define float @test_fmls_ss2S_1(float %a, float %b, <2 x float> %v) {
 ; CHECK-LABEL: test_fmls_ss2S_1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov s1, v2.s[1]
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
@@ -484,6 +494,7 @@ define float @test_fmla_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #
 define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmla_ss2S_0_strict:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s1, s2, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -494,6 +505,7 @@ define float @test_fmla_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
 define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmla_ss2S_0_swap_strict:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmadd s0, s2, s1, s0
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 0
@@ -504,6 +516,7 @@ define float @test_fmla_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #
 define float @test_fmla_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmla_ss2S_1_strict:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmla s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret
   %tmp1 = extractelement <2 x float> %v, i32 1
@@ -622,6 +635,7 @@ define float @test_fmls_ss4S_3_swap_strict(float %a, float %b, <4 x float> %v) #
 define float @test_fmls_ss2S_0_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmls_ss2S_0_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -634,6 +648,7 @@ entry:
 define float @test_fmls_ss2S_0_swap_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmls_ss2S_0_swap_strict:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fmsub s0, s2, s1, s0
 ; CHECK-NEXT:    ret
 entry:
@@ -646,6 +661,7 @@ entry:
 define float @test_fmls_ss2S_1_strict(float %a, float %b, <2 x float> %v) #0 {
 ; CHECK-LABEL: test_fmls_ss2S_1_strict:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    mov s1, v2.s[1]
 ; CHECK-NEXT:    fmls s0, s1, v2.s[1]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
index 941c8b57fe42cb..e59d9098a30d65 100644
--- a/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll
@@ -6,6 +6,7 @@
 define void @histogram_i64(<2 x ptr> %buckets, i64 %inc, <2 x i1> %mask) {
 ; CHECK-LABEL: histogram_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fmov w8, s1
 ; CHECK-NEXT:    tbnz w8, #0, .LBB0_3
 ; CHECK-NEXT:  // %bb.1: // %else
@@ -35,6 +36,7 @@ define void @histogram_i32_literal(ptr %base, <4 x i32> %indices, <4 x i1> %mask
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    dup v2.2d, x0
 ; CHECK-NEXT:    sshll v3.2d, v0.2s, #2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    umov w8, v1.h[0]
 ; CHECK-NEXT:    add v3.2d, v2.2d, v3.2d
 ; CHECK-NEXT:    tbz w8, #0, .LBB1_2

diff  --git a/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll b/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
index 9e804389432299..64ba7b2b4ee942 100644
--- a/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/neon-shuffle-vector-tbl.ll
@@ -67,6 +67,7 @@ define <8 x i8> @shuffle8_with_and_mask(<8 x i8> %src, <8 x i8> %mask) {
 ; CHECK-LABEL: shuffle8_with_and_mask:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi.8b v2, #7
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    and.8b v1, v1, v2
 ; CHECK-NEXT:    tbl.8b v0, { v0 }, v1
 ; CHECK-NEXT:    ret
@@ -113,6 +114,7 @@ define <8 x i8> @shuffle8_with_and_mask_
diff erent_constants(<8 x i8> %src, <8 x
 ; CHECK-LABEL: shuffle8_with_and_mask_
diff erent_constants:
 ; CHECK:        // %bb.0:
 ; CHECK-NEXT:   adrp	x8, .LCPI2_0
+; CHECK-NEXT:   // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:   ldr	d2, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:   and.8b	v1, v1, v2
 ; CHECK-NEXT:   tbl.8b	v0, { v0 }, v1
@@ -149,6 +151,7 @@ define <8 x i8> @shuffle8_with_and_mask_
diff erent_constants(<8 x i8> %src, <8 x
 define <8 x i8> @shuffle8_with_mask(<8 x i8> %src, <8 x i8> %mask) {
 ; CHECK-LABEL: shuffle8_with_mask:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:  // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:  tbl.8b v0, { v0 }, v1
 ; CHECK-NEXT:  ret
 

diff  --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll
index 96806f3a72a54c..5d78ad24eb333d 100644
--- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll
+++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll
@@ -41,6 +41,7 @@ define void @v8i64_v8i32(<8 x i64> %a, ptr %result) {
 define void @v2i32_v2i16(<2 x i32> %a, ptr %result) {
 ; CHECK-LABEL: v2i32_v2i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strh w9, [x0]
@@ -88,6 +89,7 @@ define void @v16i32_v16i16(<16 x i32> %a, ptr %result) {
 define void @v2i32_v2i8(<2 x i32> %a, ptr %result) {
 ; CHECK-LABEL: v2i32_v2i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w9, [x0]
@@ -154,6 +156,7 @@ define void @v32i32_v32i8(<32 x i32> %a, ptr %result) {
 define void @v2i16_v2i8(<2 x i16> %a, ptr %result) {
 ; CHECK-LABEL: v2i16_v2i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v0.s[1]
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    strb w9, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/neon-vcmla.ll b/llvm/test/CodeGen/AArch64/neon-vcmla.ll
index 1f9e3ab6155ac3..a648256f95a8d4 100644
--- a/llvm/test/CodeGen/AArch64/neon-vcmla.ll
+++ b/llvm/test/CodeGen/AArch64/neon-vcmla.ll
@@ -14,6 +14,7 @@ entry:
 define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-LABEL: test_16x4_lane_1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[1], #0
 ; CHECK-NEXT:    ret
 entry:
@@ -37,6 +38,7 @@ entry:
 define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 ; CHECK-LABEL: test_rot90_16x4_lane_0:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    fcmla v0.4h, v1.4h, v2.h[0], #90
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
index b0f8036879e893..8f05fd6cb76bd9 100644
--- a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
+++ b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll
@@ -4,6 +4,7 @@
 define <4 x i16> @shuffle1(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
 entry:
@@ -14,6 +15,7 @@ entry:
 define <4 x i16> @shuffle2(<4 x i16> %v) {
 ; CHECK-LABEL: shuffle2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -71,6 +73,7 @@ entry:
 define <8 x i8> @shuffle7(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle7:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
 entry:
@@ -82,6 +85,7 @@ entry:
 define <8 x i8> @shuffle8(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[3]
 ; CHECK-NEXT:    ret
 entry:
@@ -95,6 +99,7 @@ define <8 x i8> @shuffle_not1(<16 x i8> %v) {
 ; CHECK-LABEL: shuffle_not1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   ret <8 x i8> %res
@@ -126,8 +131,9 @@ entry:
 define <8 x i8> @shuffle_not4(<8 x i8> %v) {
 ; CHECK-LABEL: shuffle_not4:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov v0.d[1], v0.d[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
+; CHECK-NEXT:    mov v0.d[1], v0.d[0]
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
index d315c306aa37a0..afcced5dcb9ab5 100644
--- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll
@@ -137,7 +137,9 @@ define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: shuffle_widen_faili1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI12_0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -150,7 +152,9 @@ define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: shuffle_widen_fail2:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI13_0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
 entry:
@@ -163,7 +167,9 @@ define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: shuffle_widen_fail3:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
index 1b55e733dfc831..ed84ec2ba9eaeb 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
@@ -545,6 +545,10 @@ define <5 x double> @test_ldnp_v5f64(ptr %A) {
 ; CHECK-NEXT:    ldr d4, [x0, #32]
 ; CHECK-NEXT:    ext.16b v1, v0, v0, #8
 ; CHECK-NEXT:    ext.16b v3, v2, v2, #8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 killed $q2
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    ; kill: def $d3 killed $d3 killed $q3
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-BE-LABEL: test_ldnp_v5f64:
@@ -553,8 +557,13 @@ define <5 x double> @test_ldnp_v5f64(ptr %A) {
 ; CHECK-BE-NEXT:    ld1 { v0.2d }, [x0]
 ; CHECK-BE-NEXT:    ldr d4, [x0, #32]
 ; CHECK-BE-NEXT:    ld1 { v2.2d }, [x8]
+; CHECK-BE-NEXT:    // kill: def $d4 killed $d4 killed $q4
 ; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-BE-NEXT:    ext v3.16b, v2.16b, v2.16b, #8
+; CHECK-BE-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-BE-NEXT:    // kill: def $d2 killed $d2 killed $q2
+; CHECK-BE-NEXT:    // kill: def $d3 killed $d3 killed $q3
 ; CHECK-BE-NEXT:    ret
   %lv = load<5 x double>, ptr %A, align 8, !nontemporal !0
   ret <5 x double> %lv

diff  --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll
index 06ee0643c4016f..f8ba150a0405ff 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal.ll
@@ -64,6 +64,7 @@ define void @test_stnp_v16i8(ptr %p, <16 x i8> %v) #0 {
 define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2i32:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -79,6 +80,7 @@ define void @test_stnp_v2i32(ptr %p, <2 x i32> %v) #0 {
 define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v4i16:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -94,6 +96,7 @@ define void @test_stnp_v4i16(ptr %p, <4 x i16> %v) #0 {
 define void @test_stnp_v8i8(ptr %p, <8 x i8> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v8i8:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -139,6 +142,7 @@ define void @test_stnp_v4f32(ptr %p, <4 x float> %v) #0 {
 define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -154,6 +158,7 @@ define void @test_stnp_v2f32(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v1f64:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -169,6 +174,7 @@ define void @test_stnp_v1f64(ptr %p, <1 x double> %v) #0 {
 define void @test_stnp_v1i64(ptr %p, <1 x i64> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v1i64:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0]
 ; CHECK-LE-NEXT:    ret
@@ -232,6 +238,7 @@ define void @test_stnp_v2f64_offset_neg(ptr %p, <2 x double> %v) #0 {
 define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #8]
 ; CHECK-LE-NEXT:    ret
@@ -248,6 +255,7 @@ define void @test_stnp_v2f32_offset(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_offset_neg(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-8]
 ; CHECK-LE-NEXT:    ret
@@ -418,6 +426,7 @@ define void @test_stnp_v4f32_offset_neg_512(ptr %p, <4 x float> %v) #0 {
 define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_256:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    add x8, x0, #256
 ; CHECK-LE-NEXT:    stnp s0, s1, [x8]
@@ -435,6 +444,7 @@ define void @test_stnp_v2f32_invalid_offset_256(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset_252:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #252]
 ; CHECK-LE-NEXT:    ret
@@ -451,6 +461,7 @@ define void @test_stnp_v2f32_offset_252(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    sub x8, x0, #260
 ; CHECK-LE-NEXT:    stnp s0, s1, [x8]
@@ -469,6 +480,7 @@ define void @test_stnp_v2f32_invalid_offset_neg_260(ptr %p, <2 x float> %v) #0 {
 define void @test_stnp_v2f32_offset_neg_256(ptr %p, <2 x float> %v) #0 {
 ; CHECK-LE-LABEL: test_stnp_v2f32_offset_neg_256:
 ; CHECK-LE:       // %bb.0:
+; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-LE-NEXT:    mov s1, v0.s[1]
 ; CHECK-LE-NEXT:    stnp s0, s1, [x0, #-256]
 ; CHECK-LE-NEXT:    ret
@@ -629,9 +641,17 @@ entry:
 define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
 ; CHECK-LE-LABEL: test_stnp_v17f32:
 ; CHECK-LE:       // %bb.0: // %entry
+; CHECK-LE-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-LE-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-LE-NEXT:    ldr s16, [sp, #32]
-; CHECK-LE-NEXT:    mov v4.s[1], v5.s[0]
+; CHECK-LE-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-LE-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-LE-NEXT:    add x8, sp, #40
+; CHECK-LE-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-LE-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-LE-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-LE-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-LE-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-LE-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-LE-NEXT:    ldr s5, [sp]
 ; CHECK-LE-NEXT:    ld1 { v16.s }[1], [x8]
@@ -663,11 +683,19 @@ define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) {
 ;
 ; CHECK-BE-LABEL: test_stnp_v17f32:
 ; CHECK-BE:       // %bb.0: // %entry
+; CHECK-BE-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-BE-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-BE-NEXT:    ldr s16, [sp, #36]
+; CHECK-BE-NEXT:    // kill: def $s5 killed $s5 def $q5
+; CHECK-BE-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-BE-NEXT:    ldr s17, [sp, #4]
 ; CHECK-BE-NEXT:    add x8, sp, #44
 ; CHECK-BE-NEXT:    mov v4.s[1], v5.s[0]
 ; CHECK-BE-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-BE-NEXT:    // kill: def $s6 killed $s6 def $q6
+; CHECK-BE-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-BE-NEXT:    // kill: def $s7 killed $s7 def $q7
+; CHECK-BE-NEXT:    // kill: def $s3 killed $s3 def $q3
 ; CHECK-BE-NEXT:    ldr s1, [sp, #68]
 ; CHECK-BE-NEXT:    ld1 { v16.s }[1], [x8]
 ; CHECK-BE-NEXT:    add x8, sp, #12

diff  --git a/llvm/test/CodeGen/AArch64/phi.ll b/llvm/test/CodeGen/AArch64/phi.ll
index b215a222b7fc9d..55942d0e421bb9 100644
--- a/llvm/test/CodeGen/AArch64/phi.ll
+++ b/llvm/test/CodeGen/AArch64/phi.ll
@@ -302,6 +302,8 @@ define <2 x i8> @tv2i8(i1 %c, ptr %p, <2 x i8> %a, <2 x i8> %b) {
 ;
 ; CHECK-GI-LABEL: tv2i8:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    tbz w0, #0, .LBB10_2
 ; CHECK-GI-NEXT:  // %bb.1: // %t
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
@@ -317,6 +319,7 @@ define <2 x i8> @tv2i8(i1 %c, ptr %p, <2 x i8> %a, <2 x i8> %b) {
 ; CHECK-GI-NEXT:    umov w9, v1.b[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    mov v0.s[1], w9
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -389,10 +392,12 @@ define <4 x i8> @tv4i8(i1 %c, ptr %p, <4 x i8> %a, <4 x i8> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB12_2:
 ; CHECK-GI-NEXT:    uzp1 v0.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -497,10 +502,12 @@ define <2 x i16> @tv2i16(i1 %c, ptr %p, <2 x i16> %a, <2 x i16> %b) {
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB16_2:
 ; CHECK-GI-NEXT:    uzp1 v0.4h, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -739,6 +746,8 @@ define <3 x i64> @tv3i64(i1 %c, ptr %p, <3 x i64> %a, <3 x i64> %b) {
 ;
 ; CHECK-GI-LABEL: tv3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    tbz w0, #0, .LBB26_2
 ; CHECK-GI-NEXT:  // %bb.1: // %t
 ; CHECK-GI-NEXT:    fmov d6, d0
@@ -746,14 +755,18 @@ define <3 x i64> @tv3i64(i1 %c, ptr %p, <3 x i64> %a, <3 x i64> %b) {
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    mov v6.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.16b, v7.16b
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v6.d[1]
 ; CHECK-GI-NEXT:    mov v0.16b, v6.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB26_2:
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    fmov d2, d5
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e
@@ -1195,6 +1208,8 @@ define <3 x double> @tv3f64(i1 %c, ptr %p, <3 x double> %a, <3 x double> %b) {
 ;
 ; CHECK-GI-LABEL: tv3f64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    tbz w0, #0, .LBB42_2
 ; CHECK-GI-NEXT:  // %bb.1: // %t
 ; CHECK-GI-NEXT:    fmov d6, d0
@@ -1202,14 +1217,18 @@ define <3 x double> @tv3f64(i1 %c, ptr %p, <3 x double> %a, <3 x double> %b) {
 ; CHECK-GI-NEXT:    str wzr, [x1]
 ; CHECK-GI-NEXT:    mov v6.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    mov v2.16b, v7.16b
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov d1, v6.d[1]
 ; CHECK-GI-NEXT:    mov v0.16b, v6.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 ; CHECK-GI-NEXT:  .LBB42_2:
 ; CHECK-GI-NEXT:    fmov d0, d3
 ; CHECK-GI-NEXT:    fmov d2, d5
+; CHECK-GI-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-GI-NEXT:    mov v0.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
     br i1 %c, label %t, label %e

diff  --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 86a48e956a7502..f9f1cd4b1fcf76 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -126,6 +126,7 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
 ; GISEL-NEXT:    mov w8, v0.s[0]
 ; GISEL-NEXT:    fmov w9, s1
 ; GISEL-NEXT:    add x0, x8, w9, uxtw
+; GISEL-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; GISEL-NEXT:    ret
 ;
 ; GISELO0-LABEL: popcount256:

diff  --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll
index f670d3f171ec7d..495541e9fd949d 100644
--- a/llvm/test/CodeGen/AArch64/pow.ll
+++ b/llvm/test/CodeGen/AArch64/pow.ll
@@ -75,10 +75,13 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    fmov s1, #0.25000000
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    fmov s1, #0.25000000
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -86,6 +89,7 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[3]
@@ -93,6 +97,7 @@ define <4 x float> @pow_v4f32_one_fourth_not_enough_fmf(<4 x float> %x) nounwind
 ; CHECK-NEXT:    fmov s1, #0.25000000
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -112,10 +117,13 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    fmov d1, #0.25000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    add sp, sp, #48

diff  --git a/llvm/test/CodeGen/AArch64/pr-cf624b2.ll b/llvm/test/CodeGen/AArch64/pr-cf624b2.ll
index b169a709862e23..ea9588e9e3db7d 100644
--- a/llvm/test/CodeGen/AArch64/pr-cf624b2.ll
+++ b/llvm/test/CodeGen/AArch64/pr-cf624b2.ll
@@ -12,6 +12,7 @@ define linkonce_odr void @_ZN1y2beEPiRK1vPmPS1_(<8 x i8> %0, ptr %agg.tmp.i) {
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    movi v1.2d, #0000000000000000
 ; CHECK-NEXT:    orr x9, x8, #0xf
 ; CHECK-NEXT:    orr x10, x8, #0xe

diff  --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll
index 725d2ddaae6ca9..f7efab1ff66b33 100644
--- a/llvm/test/CodeGen/AArch64/pr58350.ll
+++ b/llvm/test/CodeGen/AArch64/pr58350.ll
@@ -12,6 +12,7 @@ define void @f(<1 x float> %a, i64 %b) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    mov x9, sp
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    bfi x9, x0, #2, #1
 ; CHECK-NEXT:    str d1, [sp]

diff  --git a/llvm/test/CodeGen/AArch64/pr58431.ll b/llvm/test/CodeGen/AArch64/pr58431.ll
index cacdc7ffcc04cf..88bab4af95d64f 100644
--- a/llvm/test/CodeGen/AArch64/pr58431.ll
+++ b/llvm/test/CodeGen/AArch64/pr58431.ll
@@ -8,6 +8,7 @@ define i32 @f(i64 %0) {
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    udiv x10, x9, x8
 ; CHECK-NEXT:    msub x0, x10, x8, x9
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %2 = trunc i64 %0 to i32
   %3 = freeze i32 %2

diff  --git a/llvm/test/CodeGen/AArch64/pr61111.ll b/llvm/test/CodeGen/AArch64/pr61111.ll
index fe8940e860df4a..d2c72921a92299 100644
--- a/llvm/test/CodeGen/AArch64/pr61111.ll
+++ b/llvm/test/CodeGen/AArch64/pr61111.ll
@@ -4,6 +4,7 @@
 define i62 @f(i1 %0) {
 ; CHECK-LABEL: f:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    sub x8, x8, #1
 ; CHECK-NEXT:    tst x8, #0x3fffffffffffffff

diff  --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
index 993c3eb810b1e8..83dd240a6540fa 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
@@ -5,10 +5,11 @@ define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5,
 ; CHECK-LABEL: callee:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
-; CHECK-NEXT:    add x8, sp, #24
 ; CHECK-NEXT:    mov x0, x5
+; CHECK-NEXT:    add x8, sp, #24
 ; CHECK-NEXT:    stp x6, x7, [sp, #32]
 ; CHECK-NEXT:    str x5, [sp, #24]
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    str x8, [sp, #8]
 ; CHECK-NEXT:    add sp, sp, #48
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/ptradd.ll b/llvm/test/CodeGen/AArch64/ptradd.ll
index 0e6b38c86323e2..427542377bfdae 100644
--- a/llvm/test/CodeGen/AArch64/ptradd.ll
+++ b/llvm/test/CodeGen/AArch64/ptradd.ll
@@ -74,11 +74,17 @@ entry:
 define <3 x ptr> @vector_gep_v3i32(<3 x ptr> %b, <3 x i32> %off) {
 ; CHECK-SD-LABEL: vector_gep_v3i32:
 ; CHECK-SD:       // %bb.0: // %entry
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    ext v4.16b, v3.16b, v3.16b, #8
-; CHECK-SD-NEXT:    saddw v0.2d, v0.2d, v3.2s
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    saddw v2.2d, v2.2d, v4.2s
+; CHECK-SD-NEXT:    saddw v0.2d, v0.2d, v3.2s
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i32:
@@ -159,8 +165,10 @@ define <3 x ptr> @vector_gep_v3i64(<3 x ptr> %b, <3 x i64> %off) {
 ; CHECK-GI-LABEL: vector_gep_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    fmov x8, d0
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x9, d5
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[0], x8
 ; CHECK-GI-NEXT:    fmov x8, d1
 ; CHECK-GI-NEXT:    mov v0.d[1], x8
@@ -244,22 +252,28 @@ entry:
 define <3 x ptr> @vector_gep_v3i64_base(ptr %b, <3 x i64> %off) {
 ; CHECK-SD-LABEL: vector_gep_v3i64_base:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    fmov d3, x0
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    dup v1.2d, x0
-; CHECK-SD-NEXT:    fmov d3, x0
 ; CHECK-SD-NEXT:    add d2, d3, d2
 ; CHECK-SD-NEXT:    add v0.2d, v1.2d, v0.2d
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i64_base:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    dup v1.2d, x0
-; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    add x8, x0, x8
-; CHECK-GI-NEXT:    add v0.2d, v1.2d, v0.2d
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    add v0.2d, v1.2d, v0.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -329,6 +343,8 @@ define <3 x ptr> @vector_gep_v3i64_c10(ptr %b) {
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
 ; CHECK-SD-NEXT:    add d2, d3, d2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i64_c10:
@@ -419,6 +435,8 @@ define <3 x ptr> @vector_gep_v3i64_cm10(ptr %b) {
 ; CHECK-SD-NEXT:    add v0.2d, v0.2d, v2.2d
 ; CHECK-SD-NEXT:    add d2, d3, d2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: vector_gep_v3i64_cm10:

diff  --git a/llvm/test/CodeGen/AArch64/qmovn.ll b/llvm/test/CodeGen/AArch64/qmovn.ll
index 04b9d6187a70df..2685ea9fb5d202 100644
--- a/llvm/test/CodeGen/AArch64/qmovn.ll
+++ b/llvm/test/CodeGen/AArch64/qmovn.ll
@@ -210,6 +210,7 @@ entry:
 define <16 x i8> @signed_minmax_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: signed_minmax_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -223,6 +224,7 @@ entry:
 define <8 x i16> @signed_minmax_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: signed_minmax_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -236,6 +238,7 @@ entry:
 define <4 x i32> @signed_minmax_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_minmax_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -251,6 +254,7 @@ entry:
 define <16 x i8> @signed_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: signed_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -264,6 +268,7 @@ entry:
 define <8 x i16> @signed_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: signed_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -277,6 +282,7 @@ entry:
 define <4 x i32> @signed_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: signed_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -292,6 +298,7 @@ entry:
 define <16 x i8> @unsigned_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: unsigned_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqxtn2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -304,6 +311,7 @@ entry:
 define <8 x i16> @unsigned_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: unsigned_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqxtn2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -316,6 +324,7 @@ entry:
 define <4 x i32> @unsigned_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: unsigned_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    uqxtn2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -330,6 +339,7 @@ entry:
 define <16 x i8> @us_maxmin_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: us_maxmin_v8i16_to_v16i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -343,6 +353,7 @@ entry:
 define <8 x i16> @us_maxmin_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: us_maxmin_v4i32_to_v8i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -356,6 +367,7 @@ entry:
 define <4 x i32> @us_maxmin_v2i64_to_v4i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: us_maxmin_v2i64_to_v4i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:
@@ -371,6 +383,7 @@ entry:
 define <16 x i8> @sminsmax_range_unsigned_i16_to_i8(<8 x i8> %x, <8 x i16> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i16_to_i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.16b, v1.8h
 ; CHECK-NEXT:    ret
 entry:
@@ -384,6 +397,7 @@ entry:
 define <8 x i16> @sminsmax_range_unsigned_i32_to_i16(<4 x i16> %x, <4 x i32> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i32_to_i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.8h, v1.4s
 ; CHECK-NEXT:    ret
 entry:
@@ -397,6 +411,7 @@ entry:
 define <4 x i32> @sminsmax_range_unsigned_i64_to_i32(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: sminsmax_range_unsigned_i64_to_i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    sqxtun2 v0.4s, v1.2d
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/rcpc3.ll b/llvm/test/CodeGen/AArch64/rcpc3.ll
index 26e7b9325846d7..45770337c13208 100644
--- a/llvm/test/CodeGen/AArch64/rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/rcpc3.ll
@@ -77,6 +77,7 @@ define hidden <1 x i64> @test_ldap1_1xi64_lane0(ptr nocapture noundef readonly %
 ; RCPC3-LABEL: test_ldap1_1xi64_lane0:
 ; RCPC3:       // %bb.0:
 ; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; RCPC3-NEXT:    ret
 ;
 ; NO-RCPC3-LABEL: test_ldap1_1xi64_lane0:
@@ -94,6 +95,7 @@ define hidden nofpclass(nan inf) <1 x double> @test_ldap1_1xdouble_lane0(ptr noc
 ; RCPC3-LABEL: test_ldap1_1xdouble_lane0:
 ; RCPC3:       // %bb.0:
 ; RCPC3-NEXT:    ldap1 { v0.d }[0], [x0]
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; RCPC3-NEXT:    ret
 ;
 ; NO-RCPC3-LABEL: test_ldap1_1xdouble_lane0:
@@ -179,11 +181,13 @@ define hidden void @test_stl1_1xi64_lane0(ptr nocapture noundef writeonly %a, <1
 ;
 ; RCPC3-LABEL: test_stl1_1xi64_lane0:
 ; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
 ; RCPC3-NEXT:    ret
 ;
 ; NO-RCPC3-LABEL: test_stl1_1xi64_lane0:
 ; NO-RCPC3:       // %bb.0:
+; NO-RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; NO-RCPC3-NEXT:    fmov x8, d0
 ; NO-RCPC3-NEXT:    stlr x8, [x0]
 ; NO-RCPC3-NEXT:    ret
@@ -196,6 +200,7 @@ define hidden void @test_stl1_1xdouble_lane0(ptr nocapture noundef writeonly %a,
 ;
 ; RCPC3-LABEL: test_stl1_1xdouble_lane0:
 ; RCPC3:       // %bb.0:
+; RCPC3-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; RCPC3-NEXT:    stl1 { v0.d }[0], [x0]
 ; RCPC3-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll
index ebfcfd4ea0a398..8ca521327c2e31 100644
--- a/llvm/test/CodeGen/AArch64/reduce-and.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-and.ll
@@ -29,6 +29,7 @@ define i1 @test_redand_v2i1(<2 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v2i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    and w8, w9, w8
@@ -50,6 +51,7 @@ define i1 @test_redand_v4i1(<4 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v4i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -75,6 +77,7 @@ define i1 @test_redand_v8i1(<8 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redand_v8i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -196,11 +199,13 @@ define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) {
 define i8 @test_redand_v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: test_redand_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redand_v1i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w0, v0.b[0]
 ; GISEL-NEXT:    ret
   %and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
@@ -240,6 +245,7 @@ define i8 @test_redand_v4i8(<4 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redand_v4i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -264,6 +270,7 @@ define i8 @test_redand_v8i8(<8 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redand_v8i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -369,6 +376,7 @@ define i16 @test_redand_v4i16(<4 x i16> %a) {
 ;
 ; GISEL-LABEL: test_redand_v4i16:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -447,6 +455,7 @@ define i32 @test_redand_v2i32(<2 x i32> %a) {
 ;
 ; GISEL-LABEL: test_redand_v2i32:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    and w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll
index 2c2577f8916371..aac31ce8b71b75 100644
--- a/llvm/test/CodeGen/AArch64/reduce-or.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-or.ll
@@ -29,6 +29,7 @@ define i1 @test_redor_v2i1(<2 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v2i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    orr w8, w9, w8
@@ -50,6 +51,7 @@ define i1 @test_redor_v4i1(<4 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v4i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -75,6 +77,7 @@ define i1 @test_redor_v8i1(<8 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redor_v8i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -196,11 +199,13 @@ define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) {
 define i8 @test_redor_v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: test_redor_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redor_v1i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w0, v0.b[0]
 ; GISEL-NEXT:    ret
   %or_result = call i8 @llvm.vector.reduce.or.v1i8(<1 x i8> %a)
@@ -242,6 +247,7 @@ define i8 @test_redor_v4i8(<4 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redor_v4i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -266,6 +272,7 @@ define i8 @test_redor_v8i8(<8 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redor_v8i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -371,6 +378,7 @@ define i16 @test_redor_v4i16(<4 x i16> %a) {
 ;
 ; GISEL-LABEL: test_redor_v4i16:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -449,6 +457,7 @@ define i32 @test_redor_v2i32(<2 x i32> %a) {
 ;
 ; GISEL-LABEL: test_redor_v2i32:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    orr w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
index 05867346372d38..325ab444205bf9 100644
--- a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll
@@ -4,6 +4,8 @@
 define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) {
 ; CHECK-LABEL: v1:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d0, [x0]
@@ -225,7 +227,9 @@ entry:
 define i32 @v2(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) {
 ; CHECK-LABEL: v2:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
+; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d4, [x0]
 ; CHECK-NEXT:    ldr d5, [x2]
@@ -439,6 +443,8 @@ entry:
 define i32 @v3(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) {
 ; CHECK-LABEL: v3:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    sxtw x9, w3
 ; CHECK-NEXT:    ldr d0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll
index 7e55f07cc6b7af..9a00172f947636 100644
--- a/llvm/test/CodeGen/AArch64/reduce-xor.ll
+++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll
@@ -26,6 +26,7 @@ define i1 @test_redxor_v2i1(<2 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v2i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    eor w8, w9, w8
@@ -45,6 +46,7 @@ define i1 @test_redxor_v4i1(<4 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v4i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -68,6 +70,7 @@ define i1 @test_redxor_v8i1(<8 x i1> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v8i1:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -185,11 +188,13 @@ define <16 x i1> @test_redxor_ins_v16i1(<16 x i1> %a) {
 define i8 @test_redxor_v1i8(<1 x i8> %a) {
 ; CHECK-LABEL: test_redxor_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
 ;
 ; GISEL-LABEL: test_redxor_v1i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w0, v0.b[0]
 ; GISEL-NEXT:    ret
   %xor_result = call i8 @llvm.vector.reduce.xor.v1i8(<1 x i8> %a)
@@ -231,6 +236,7 @@ define i8 @test_redxor_v4i8(<4 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v4i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -255,6 +261,7 @@ define i8 @test_redxor_v8i8(<8 x i8> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v8i8:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.b[0]
 ; GISEL-NEXT:    umov w9, v0.b[1]
 ; GISEL-NEXT:    umov w10, v0.b[2]
@@ -360,6 +367,7 @@ define i16 @test_redxor_v4i16(<4 x i16> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v4i16:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    umov w8, v0.h[0]
 ; GISEL-NEXT:    umov w9, v0.h[1]
 ; GISEL-NEXT:    umov w10, v0.h[2]
@@ -438,6 +446,7 @@ define i32 @test_redxor_v2i32(<2 x i32> %a) {
 ;
 ; GISEL-LABEL: test_redxor_v2i32:
 ; GISEL:       // %bb.0:
+; GISEL-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; GISEL-NEXT:    mov w8, v0.s[1]
 ; GISEL-NEXT:    fmov w9, s0
 ; GISEL-NEXT:    eor w0, w9, w8

diff  --git a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
index 96404961cfc57f..9bd3ad9165cee6 100644
--- a/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
+++ b/llvm/test/CodeGen/AArch64/regalloc-last-chance-recolor-with-split.mir
@@ -275,7 +275,7 @@ body:             |
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   dead $w1 = MOVi32imm 526, implicit-def $x1
   ; CHECK-NEXT:   dead $w2 = MOVi32imm 2, implicit-def $x2
-  ; CHECK-NEXT:   renamable $w19 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $w19 = MOVi32imm 2, implicit-def $x19
   ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 4, @bar, undef $x0, $x1, $x2, undef $x3, 2, 0, 2, 4, 2, 39, 2, 0, 2, 1, 2, 0, 2, 42, 2, 2, 2, 14, 2, 0, 2, 3, 2, 400, 2, 3, 2, 400, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, 2, 95, 2, 7, 2, 0, 2, 3, 2, -11, 2, 3, 2, -8280, 2, 3, 2, 45, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 5, 2, 7, 2, 0, 2, 3, 2, 1, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_aarch64_aapcs, implicit-def $sp, implicit-def $x0, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x20 = COPY $x0
@@ -305,7 +305,7 @@ body:             |
   ; CHECK-NEXT:   renamable $x20 = LDRXui undef renamable $x8, 0 :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   renamable $w21 = MOVi32imm -8280
   ; CHECK-NEXT:   renamable $w23 = MOVi32imm -6
-  ; CHECK-NEXT:   renamable $w25 = MOVi32imm 3
+  ; CHECK-NEXT:   renamable $w25 = MOVi32imm 3, implicit-def $x25
   ; CHECK-NEXT:   renamable $w24 = MOVi32imm 2143289344
   ; CHECK-NEXT:   renamable $x22 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead renamable $x8 = IMPLICIT_DEF
@@ -316,21 +316,21 @@ body:             |
   ; CHECK-NEXT:   KILL killed renamable $x8
   ; CHECK-NEXT:   renamable $x8 = IMPLICIT_DEF
   ; CHECK-NEXT:   KILL killed renamable $x8
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
   ; CHECK-NEXT:   B %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2.bb23:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT:   liveins: $x19:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x19
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $w20 = MOVi32imm 95
-  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 117, 2, 2, 2, 14, 2, 0, 2, 3, 2, 3, 2, 3, 2, 109, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w20, 2, 3, killed renamable $w19, 2, 3, 2, 3, 2, 3, 2, -8280, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 5, 2, 7, 2, 0, 2, 3, 2, 1, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 2, 1, 8, %stack.0, 0, 2, 4278124286, 2, 0, 2, 2, 0, 0, 1, 1, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.0)
+  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 117, 2, 2, 2, 14, 2, 0, 2, 3, 2, 3, 2, 3, 2, 109, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w20, 2, 3, renamable $w19, 2, 3, 2, 3, 2, 3, 2, -8280, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 5, 2, 7, 2, 0, 2, 3, 2, 1, 2, 0, 2, 4278124286, 2, 7, 2, 0, 2, 2, 1, 8, %stack.0, 0, 2, 4278124286, 2, 0, 2, 2, 0, 0, 1, 1, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x19 :: (load store (s64) on %stack.0)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.bb27:
   ; CHECK-NEXT:   successors: %bb.4(0x80000000), %bb.13(0x00000000)
-  ; CHECK-NEXT:   liveins: $w21, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $w21, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $fp = nuw nsw ADDXri renamable $x25, 1, 0
   ; CHECK-NEXT:   CBNZW $wzr, %bb.13
@@ -338,26 +338,26 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4.bb41:
   ; CHECK-NEXT:   successors: %bb.5(0x7ffff777), %bb.6(0x00000889)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w21, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w21, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBZW $wzr, %bb.6
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.5:
   ; CHECK-NEXT:   successors: %bb.7(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   B %bb.7
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.6.bb42:
   ; CHECK-NEXT:   successors: %bb.7(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w21, $w23, $w24, $x20:0x0000000000000040, $x25:0x00000000000000C0, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w21, $w23, $w24, $x20, $x25, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x19 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
-  ; CHECK-NEXT:   renamable $w22 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $w22 = MOVi32imm 2, implicit-def $x22
   ; CHECK-NEXT:   renamable $w26 = MOVi32imm 95
-  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 125, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w26, 2, 3, killed renamable $w22, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.1)
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 125, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w26, 2, 3, renamable $w22, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x22 :: (load store (s64) on %stack.1)
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x19 = COPY $xzr
   ; CHECK-NEXT:   renamable $x8 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
@@ -369,18 +369,18 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.7.bb48:
   ; CHECK-NEXT:   successors: %bb.8(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.8.bb79:
   ; CHECK-NEXT:   successors: %bb.9(0x04000000), %bb.8(0x7c000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBNZW $wzr, %bb.8
   ; CHECK-NEXT:   B %bb.9
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.9.bb81:
   ; CHECK-NEXT:   successors: %bb.11(0x78787f1d), %bb.10(0x078780e3)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   STRXui $xzr, renamable $x22, 0 :: (store unordered (s64), addrspace 1)
   ; CHECK-NEXT:   CBNZW $wzr, %bb.11
@@ -388,28 +388,28 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.10.bb82:
   ; CHECK-NEXT:   successors: %bb.11(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   BL @blam.1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.11.bb83:
   ; CHECK-NEXT:   successors: %bb.12(0x7ffff777), %bb.17(0x00000889)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBZW $wzr, %bb.17
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.12:
   ; CHECK-NEXT:   successors: %bb.18(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   B %bb.18
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.13.bb85:
   ; CHECK-NEXT:   successors: %bb.14(0x40000000), %bb.15(0x40000000)
-  ; CHECK-NEXT:   liveins: $fp:0x0000000000000040, $w21, $x10:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $fp, $w21, $x10
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   CBNZW $wzr, %bb.15
   ; CHECK-NEXT:   B %bb.14
@@ -424,12 +424,12 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.15.bb90:
   ; CHECK-NEXT:   successors:
-  ; CHECK-NEXT:   liveins: $fp:0x0000000000000040, $w21, $x10:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $fp, $w21, $x10
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   $w0 = MOVi32imm 12
   ; CHECK-NEXT:   renamable $w8 = MOVi32imm 95
-  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 43, 2, 0, 2, 1, 2, 0, 2, 201, 2, 4, 2, 14, 2, 0, 2, 0, 2, 4278124286, 2, 3, killed renamable $w29, 2, 0, 2, 4278124286, 2, 3, renamable $w29, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w8, 2, 3, killed renamable $w10, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.0), (load store (s64) on %stack.1)
+  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 43, 2, 0, 2, 1, 2, 0, 2, 201, 2, 4, 2, 14, 2, 0, 2, 0, 2, 4278124286, 2, 3, renamable $w29, 2, 0, 2, 4278124286, 2, 3, renamable $w29, 2, 0, 1, 8, %stack.0, 0, 2, 7, 2, 0, 2, 3, killed renamable $w8, 2, 3, renamable $w10, 2, 3, 2, 4278124286, 2, 3, killed renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 2, 2, 7, 2, 0, 2, 7, 2, 0, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 2, 0, 2, 3, 0, 0, 1, 1, 2, 2, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x10, implicit killed $fp :: (load store (s64) on %stack.0), (load store (s64) on %stack.1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.16.bb94:
@@ -442,14 +442,14 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.17.bb96:
   ; CHECK-NEXT:   successors: %bb.18(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x20:0x0000000000000040, $x25:0x00000000000000C0, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x20, $x25, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x19 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
-  ; CHECK-NEXT:   renamable $w21 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $w21 = MOVi32imm 2, implicit-def $x21
   ; CHECK-NEXT:   renamable $w22 = MOVi32imm 95
-  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 250, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w22, 2, 3, killed renamable $w21, 2, 3, 2, 4278124286, 2, 3, renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr :: (load store (s64) on %stack.1), (load store (s64) on %stack.2)
-  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2
+  ; CHECK-NEXT:   renamable $x27, dead renamable $x19 = STATEPOINT 2882400000, 0, 0, @wombat, 2, 0, 2, 0, 2, 35, 2, 0, 2, 1, 2, 0, 2, 250, 2, 0, 2, 14, 2, 0, 2, 0, killed renamable $x19, 2, 7, 2, 0, 2, 3, killed renamable $w22, 2, 3, renamable $w21, 2, 3, 2, 4278124286, 2, 3, renamable $w21, 2, 7, 2, 0, 2, 3, 2, 230, 2, 7, 2, 0, 2, 4, 2, 0, 2, 7, 2, 0, 2, 3, 2, 4278124286, 2, 0, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, killed renamable $x27(tied-def 0), 1, 8, %stack.1, 0, renamable $x19(tied-def 1), 2, 0, 2, 4, 0, 0, 1, 1, 2, 2, 3, 3, csr_aarch64_aapcs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x21 :: (load store (s64) on %stack.1), (load store (s64) on %stack.2)
+  ; CHECK-NEXT:   renamable $w10 = MOVi32imm 2, implicit-def $x10
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   renamable $x8 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
   ; CHECK-NEXT:   dead renamable $x8 = nuw ADDXri killed renamable $x8, 24, 0
@@ -460,7 +460,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.18.bb105:
   ; CHECK-NEXT:   successors: %bb.20(0x00000000), %bb.19(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   STRWui renamable $w20, renamable $x19, 0 :: (store unordered (s32), addrspace 1)
   ; CHECK-NEXT:   STRWui renamable $w24, renamable $x26, 0 :: (store unordered (s32), align 8, addrspace 1)
@@ -469,7 +469,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.19.bb115:
   ; CHECK-NEXT:   successors: %bb.16(0x00000000), %bb.3(0x80000000)
-  ; CHECK-NEXT:   liveins: $fp:0x00000000000000C0, $w23, $w24, $x10:0x0000000000000040, $x19, $x20:0x0000000000000040, $x22, $x25:0x00000000000000C0, $x26, $x27
+  ; CHECK-NEXT:   liveins: $fp, $w23, $w24, $x10, $x19, $x20, $x22, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $x8 = COPY $xzr
   ; CHECK-NEXT:   renamable $w9 = LDRWui renamable $x8, 0 :: (load unordered (s32) from `ptr addrspace(1) null`, addrspace 1)
@@ -483,11 +483,11 @@ body:             |
   ; CHECK-NEXT:   B %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.20.bb120:
-  ; CHECK-NEXT:   liveins: $x10:0x0000000000000040
+  ; CHECK-NEXT:   liveins: $x10
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-NEXT:   $w0 = MOVi32imm 10
-  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 272, 2, 2, 2, 14, 2, 0, 2, 0, 2, 0, 2, 3, killed renamable $w10, 2, 0, 2, 0, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 0, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr
+  ; CHECK-NEXT:   STATEPOINT 2882400000, 0, 1, @blam, $w0, 2, 129, 2, 0, 2, 39, 2, 0, 2, 1, 2, 0, 2, 272, 2, 2, 2, 14, 2, 0, 2, 0, 2, 0, 2, 3, renamable $w10, 2, 0, 2, 0, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 7, 2, 0, 2, 7, 2, 4278124286, 2, 0, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_aarch64_allregs, implicit-def $sp, implicit-def dead early-clobber $lr, implicit killed $x10
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
   bb.0.bb:
     successors: %bb.1(0x80000000), %bb.2(0x00000000)

diff  --git a/llvm/test/CodeGen/AArch64/rem.ll b/llvm/test/CodeGen/AArch64/rem.ll
index d03ea0d62bf2c8..d807635f5d87d1 100644
--- a/llvm/test/CodeGen/AArch64/rem.ll
+++ b/llvm/test/CodeGen/AArch64/rem.ll
@@ -175,6 +175,7 @@ define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i8:
@@ -192,6 +193,7 @@ define <2 x i8> @sv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <2 x i8> %d, %e
@@ -262,6 +264,7 @@ define <4 x i8> @sv4i8(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i8:
@@ -299,6 +302,8 @@ entry:
 define <8 x i8> @sv8i8(<8 x i8> %d, <8 x i8> %e) {
 ; CHECK-SD-LABEL: sv8i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    smov w11, v1.b[0]
 ; CHECK-SD-NEXT:    smov w12, v0.b[0]
 ; CHECK-SD-NEXT:    smov w8, v1.b[1]
@@ -1090,6 +1095,7 @@ define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv2i8:
@@ -1106,6 +1112,7 @@ define <2 x i8> @uv2i8(<2 x i8> %d, <2 x i8> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <2 x i8> %d, %e
@@ -1152,6 +1159,8 @@ entry:
 define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-LABEL: uv4i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    bic v1.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    umov w11, v1.h[0]
@@ -1174,6 +1183,7 @@ define <4 x i8> @uv4i8(<4 x i8> %d, <4 x i8> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i8:
@@ -1210,6 +1220,8 @@ entry:
 define <8 x i8> @uv8i8(<8 x i8> %d, <8 x i8> %e) {
 ; CHECK-SD-LABEL: uv8i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w11, v1.b[0]
 ; CHECK-SD-NEXT:    umov w12, v0.b[0]
 ; CHECK-SD-NEXT:    umov w8, v1.b[1]
@@ -2002,6 +2014,7 @@ define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i16:
@@ -2019,6 +2032,7 @@ define <2 x i16> @sv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <2 x i16> %d, %e
@@ -2028,6 +2042,8 @@ entry:
 define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-LABEL: sv3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    smov w11, v1.h[0]
 ; CHECK-SD-NEXT:    smov w12, v0.h[0]
 ; CHECK-SD-NEXT:    smov w8, v1.h[1]
@@ -2043,10 +2059,13 @@ define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[1], w8
 ; CHECK-SD-NEXT:    msub w8, w16, w14, w15
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    smov w8, v0.h[0]
 ; CHECK-GI-NEXT:    smov w9, v1.h[0]
 ; CHECK-GI-NEXT:    smov w11, v0.h[1]
@@ -2062,6 +2081,7 @@ define <3 x i16> @sv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    msub w8, w16, w15, w14
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <3 x i16> %d, %e
@@ -2071,6 +2091,8 @@ entry:
 define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-LABEL: sv4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    smov w11, v1.h[0]
 ; CHECK-SD-NEXT:    smov w12, v0.h[0]
 ; CHECK-SD-NEXT:    smov w8, v1.h[1]
@@ -2091,6 +2113,7 @@ define <4 x i16> @sv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv4i16:
@@ -2455,6 +2478,7 @@ define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv2i16:
@@ -2471,6 +2495,7 @@ define <2 x i16> @uv2i16(<2 x i16> %d, <2 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <2 x i16> %d, %e
@@ -2480,6 +2505,8 @@ entry:
 define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-LABEL: uv3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w11, v1.h[0]
 ; CHECK-SD-NEXT:    umov w12, v0.h[0]
 ; CHECK-SD-NEXT:    umov w8, v1.h[1]
@@ -2500,10 +2527,13 @@ define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-SD-NEXT:    msub w10, w15, w12, w13
 ; CHECK-SD-NEXT:    sxth w8, w10
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv3i16:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v1.h[0]
 ; CHECK-GI-NEXT:    umov w11, v0.h[1]
@@ -2519,6 +2549,7 @@ define <3 x i16> @uv3i16(<3 x i16> %d, <3 x i16> %e) {
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    msub w8, w16, w15, w14
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <3 x i16> %d, %e
@@ -2528,6 +2559,8 @@ entry:
 define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-LABEL: uv4i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    umov w11, v1.h[0]
 ; CHECK-SD-NEXT:    umov w12, v0.h[0]
 ; CHECK-SD-NEXT:    umov w8, v1.h[1]
@@ -2548,6 +2581,7 @@ define <4 x i16> @uv4i16(<4 x i16> %d, <4 x i16> %e) {
 ; CHECK-SD-NEXT:    mov v0.h[2], w8
 ; CHECK-SD-NEXT:    msub w8, w9, w17, w18
 ; CHECK-SD-NEXT:    mov v0.h[3], w8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv4i16:
@@ -2899,6 +2933,8 @@ entry:
 define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-LABEL: sv2i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w8, s1
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    mov w11, v1.s[1]
@@ -2909,10 +2945,13 @@ define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sv2i32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    mov w10, v1.s[1]
@@ -2922,6 +2961,7 @@ define <2 x i32> @sv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = srem <2 x i32> %d, %e
@@ -3124,6 +3164,8 @@ entry:
 define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-LABEL: uv2i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov w8, s1
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    mov w11, v1.s[1]
@@ -3134,10 +3176,13 @@ define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-SD-NEXT:    fmov s0, w8
 ; CHECK-SD-NEXT:    msub w9, w13, w11, w12
 ; CHECK-SD-NEXT:    mov v0.s[1], w9
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: uv2i32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    fmov w8, s0
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    mov w10, v1.s[1]
@@ -3147,6 +3192,7 @@ define <2 x i32> @uv2i32(<2 x i32> %d, <2 x i32> %e) {
 ; CHECK-GI-NEXT:    mov v2.s[0], w8
 ; CHECK-GI-NEXT:    mov v2.s[1], w9
 ; CHECK-GI-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = urem <2 x i32> %d, %e
@@ -3387,6 +3433,12 @@ entry:
 define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: sv3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d3
 ; CHECK-SD-NEXT:    fmov x9, d0
 ; CHECK-SD-NEXT:    fmov x11, d4
@@ -3406,8 +3458,12 @@ define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: sv3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-GI-NEXT:    fmov x8, d0
 ; CHECK-GI-NEXT:    fmov x9, d3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x10, d4
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
@@ -3430,6 +3486,7 @@ define <3 x i64> @sv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-GI-NEXT:    msub x8, x12, x9, x8
 ; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    ret
 entry:
@@ -3541,6 +3598,12 @@ entry:
 define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-SD-LABEL: uv3i64:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d5 killed $d5 def $q5
+; CHECK-SD-NEXT:    // kill: def $d4 killed $d4 def $q4
+; CHECK-SD-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmov x8, d3
 ; CHECK-SD-NEXT:    fmov x9, d0
 ; CHECK-SD-NEXT:    fmov x11, d4
@@ -3560,8 +3623,12 @@ define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: uv3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-GI-NEXT:    fmov x8, d0
 ; CHECK-GI-NEXT:    fmov x9, d3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x10, d4
 ; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
@@ -3584,6 +3651,7 @@ define <3 x i64> @uv3i64(<3 x i64> %d, <3 x i64> %e) {
 ; CHECK-GI-NEXT:    msub x8, x12, x9, x8
 ; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v2.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    fmov d2, x8
 ; CHECK-GI-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
index d4060f9083e97e..ec7548e1e65410 100644
--- a/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
+++ b/llvm/test/CodeGen/AArch64/round-fptosi-sat-scalar.ll
@@ -7,6 +7,7 @@
 define i32 @testmswbf(bfloat %a) {
 ; CHECK-LABEL: testmswbf:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -30,6 +31,7 @@ entry:
 define i64 @testmsxbf(bfloat %a) {
 ; CHECK-LABEL: testmsxbf:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -139,6 +141,7 @@ entry:
 define i32 @testpswbf(bfloat %a) {
 ; CHECK-LABEL: testpswbf:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16
@@ -162,6 +165,7 @@ entry:
 define i64 @testpsxbf(bfloat %a) {
 ; CHECK-LABEL: testpsxbf:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $s0
 ; CHECK-NEXT:    fmov w9, s0
 ; CHECK-NEXT:    mov w8, #32767 // =0x7fff
 ; CHECK-NEXT:    lsl w9, w9, #16

diff  --git a/llvm/test/CodeGen/AArch64/select-constant-xor.ll b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
index 89409d6a32097a..3adf48e84b44c1 100644
--- a/llvm/test/CodeGen/AArch64/select-constant-xor.ll
+++ b/llvm/test/CodeGen/AArch64/select-constant-xor.ll
@@ -38,6 +38,7 @@ define i32 @selecti64i32(i64 %a) {
 define i64 @selecti32i64(i32 %a) {
 ; CHECK-LABEL: selecti32i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #31, #1
 ; CHECK-NEXT:    eor x0, x8, #0x7fffffff
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir
index b29ab7727f65d6..0e6c94c44712c7 100644
--- a/llvm/test/CodeGen/AArch64/seqpairspill.mir
+++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir
@@ -7,11 +7,11 @@ body: |
   bb.0:
     ; Check the spill/reload sequence for the %0 register
     ; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX
-    ; CHECK-NEXT: STPXi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s128) into %stack.0, align 8)
+    ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8)
     ; CHECK: INLINEASM
-    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0 :: (load (s128) from %stack.0, align 8)
+    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8)
     ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]]
-    ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]]
+    ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]]
     %0 : xseqpairsclass = IMPLICIT_DEF
     %1 : xseqpairsclass = IMPLICIT_DEF
     %2 : gpr64common = IMPLICIT_DEF
@@ -27,11 +27,11 @@ body: |
   bb.0:
     ; Check the spill/reload sequence for the %0 register
     ; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW
-    ; CHECK-NEXT: STPWi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s64) into %stack.0, align 4)
+    ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4)
     ; CHECK: INLINEASM
-    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0 :: (load (s64) from %stack.0, align 4)
+    ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4)
     ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]]
-    ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]]
+    ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]]
     %0 : wseqpairsclass = IMPLICIT_DEF
     %1 : wseqpairsclass = IMPLICIT_DEF
     %2 : gpr64common = IMPLICIT_DEF

diff  --git a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
index 84fb127239bf85..4dd2a896cd81cb 100644
--- a/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
+++ b/llvm/test/CodeGen/AArch64/setcc_knownbits.ll
@@ -61,6 +61,7 @@ define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    clz x8, x0
 ; CHECK-SD-NEXT:    lsr x0, x8, #6
+; CHECK-SD-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
@@ -89,6 +90,7 @@ define i32 @PR17487(i1 %tobool) {
 ;
 ; CHECK-GI-LABEL: PR17487:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-GI-NEXT:    mov v0.d[1], x0
 ; CHECK-GI-NEXT:    adrp x8, .LCPI3_0
 ; CHECK-GI-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]

diff  --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index d7f02e667bb6ef..53fbb351954fcf 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -25,6 +25,7 @@ entry:
 define i64 @sext_i8_to_i64(i8 %a) {
 ; CHECK-LABEL: sext_i8_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtb x0, w0
 ; CHECK-NEXT:    ret
 entry:
@@ -55,6 +56,7 @@ entry:
 define i64 @sext_i16_to_i64(i16 %a) {
 ; CHECK-LABEL: sext_i16_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxth x0, w0
 ; CHECK-NEXT:    ret
 entry:
@@ -65,6 +67,7 @@ entry:
 define i64 @sext_i32_to_i64(i32 %a) {
 ; CHECK-LABEL: sext_i32_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sxtw x0, w0
 ; CHECK-NEXT:    ret
 entry:
@@ -95,6 +98,7 @@ entry:
 define i64 @sext_i10_to_i64(i10 %a) {
 ; CHECK-LABEL: sext_i10_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x0, x0, #0, #10
 ; CHECK-NEXT:    ret
 entry:
@@ -224,6 +228,7 @@ define <3 x i16> @sext_v3i8_v3i16(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    sbfx w8, w8, #8, #8
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i8> %a to <3 x i16>
@@ -266,12 +271,18 @@ define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #56
 ; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #56
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i8_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    sxtb x8, w0
 ; CHECK-GI-NEXT:    sxtb x9, w1
 ; CHECK-GI-NEXT:    sxtb x10, w2
@@ -292,6 +303,7 @@ define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: sext_v3i16_v3i32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    smov w8, v0.h[0]
 ; CHECK-GI-NEXT:    smov w9, v0.h[1]
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
@@ -311,11 +323,15 @@ define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    sshll v2.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    sshll v0.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    sshll2 v2.2d, v2.4s, #0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i16_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    smov x8, v0.h[0]
 ; CHECK-GI-NEXT:    smov x9, v0.h[1]
 ; CHECK-GI-NEXT:    smov x10, v0.h[2]
@@ -333,8 +349,10 @@ define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    sshll v3.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshll2 v2.2d, v0.4s, #0
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i32_v3i64:
@@ -372,6 +390,7 @@ define <3 x i16> @sext_v3i10_v3i16(<3 x i10> %a) {
 ; CHECK-GI-NEXT:    sbfx w8, w8, #6, #10
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i10> %a to <3 x i16>
@@ -414,12 +433,18 @@ define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) {
 ; CHECK-SD-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    sshr v2.2d, v2.2d, #54
 ; CHECK-SD-NEXT:    shl v0.2d, v0.2d, #54
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    sshr v0.2d, v0.2d, #54
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: sext_v3i10_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    sbfx x8, x0, #0, #10
 ; CHECK-GI-NEXT:    sbfx x9, x1, #0, #10
 ; CHECK-GI-NEXT:    sbfx x10, x2, #0, #10

diff  --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
index 9ac579c0fa6a91..d4f3e80e96dd19 100644
--- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll
@@ -1194,6 +1194,7 @@ define i32 @t(i64 %x) {
 ; CHECK-LABEL: t:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ubfx x0, x0, #17, #28
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %s = lshr i64 %x, 13
   %t = trunc i64 %s to i32

diff  --git a/llvm/test/CodeGen/AArch64/shift-by-signext.ll b/llvm/test/CodeGen/AArch64/shift-by-signext.ll
index 3bcaee523f2e97..67e2da96084efb 100644
--- a/llvm/test/CodeGen/AArch64/shift-by-signext.ll
+++ b/llvm/test/CodeGen/AArch64/shift-by-signext.ll
@@ -81,6 +81,7 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; CHECK-LABEL: n6_fshl:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr w8, w1, #1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    mvn w9, w2
 ; CHECK-NEXT:    lsl w10, w0, w2
 ; CHECK-NEXT:    lsr w8, w8, w9
@@ -94,6 +95,7 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind {
 ; CHECK-LABEL: n7_fshr:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl w8, w0, #1
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    mvn w9, w2
 ; CHECK-NEXT:    lsr w10, w1, w2
 ; CHECK-NEXT:    lsl w8, w8, w9

diff  --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll
index 4f41edb8348b2f..a90603195cf348 100644
--- a/llvm/test/CodeGen/AArch64/shift-mod.ll
+++ b/llvm/test/CodeGen/AArch64/shift-mod.ll
@@ -40,6 +40,7 @@ define i64 @test3(i64 %x, i64 %y) {
 define i64 @test4(i64 %y, i32 %s) {
 ; CHECK-LABEL: test4:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    asr x0, x0, x1
 ; CHECK-NEXT:    ret
 entry:
@@ -51,6 +52,7 @@ entry:
 define i64 @test5(i64 %y, i32 %s) {
 ; CHECK-LABEL: test5:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    asr x0, x0, x1
 ; CHECK-NEXT:    ret
 entry:
@@ -62,6 +64,7 @@ entry:
 define i64 @test6(i64 %y, i32 %s) {
 ; CHECK-LABEL: test6:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    lsl x0, x0, x1
 ; CHECK-NEXT:    ret
 entry:
@@ -124,7 +127,7 @@ define i64 @ashr_add_shl_i36(i64 %r) {
 define i64 @ashr_add_shl_mismatch_shifts1(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_mismatch_shifts1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4294967296 // =0x100000000
+; CHECK-NEXT:    mov x8, #4294967296
 ; CHECK-NEXT:    add x8, x8, x0, lsl #8
 ; CHECK-NEXT:    asr x0, x8, #32
 ; CHECK-NEXT:    ret
@@ -137,7 +140,7 @@ define i64 @ashr_add_shl_mismatch_shifts1(i64 %r) {
 define i64 @ashr_add_shl_mismatch_shifts2(i64 %r) {
 ; CHECK-LABEL: ashr_add_shl_mismatch_shifts2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4294967296 // =0x100000000
+; CHECK-NEXT:    mov x8, #4294967296
 ; CHECK-NEXT:    add x8, x8, x0, lsr #8
 ; CHECK-NEXT:    lsr x0, x8, #8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
index 9b6bc09f571c37..066928687cc02d 100644
--- a/llvm/test/CodeGen/AArch64/shift.ll
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -21,6 +21,7 @@ define i1 @shl_i1(i1 %0, i1 %1){
 define i8 @shl_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: shl_i8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsl w0, w0, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -36,6 +37,7 @@ define i8 @shl_i8(i8 %0, i8 %1){
 define i16 @shl_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: shl_i16:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsl w0, w0, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -121,6 +123,7 @@ define i8 @ashr_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: ashr_i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sxtb w8, w0
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    asr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -138,6 +141,7 @@ define i16 @ashr_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: ashr_i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sxth w8, w0
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    asr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -226,6 +230,7 @@ define i8 @lshr_i8(i8 %0, i8 %1){
 ; CHECK-SD-LABEL: lshr_i8:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    and w8, w0, #0xff
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -243,6 +248,7 @@ define i16 @lshr_i16(i16 %0, i16 %1){
 ; CHECK-SD-LABEL: lshr_i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    and w8, w0, #0xffff
+; CHECK-SD-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-NEXT:    lsr w0, w8, w1
 ; CHECK-SD-NEXT:    ret
 ;
@@ -529,6 +535,7 @@ define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){
 ; CHECK-GI-NEXT:    uzp1 v1.8b, v1.8b, v0.8b
 ; CHECK-GI-NEXT:    ushl v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = shl <4 x i8> %0, %1
     ret <4 x i8> %3
@@ -564,6 +571,7 @@ define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){
 ; CHECK-GI-NEXT:    uzp1 v1.4h, v1.4h, v0.4h
 ; CHECK-GI-NEXT:    ushl v0.4h, v0.4h, v1.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = shl <2 x i16> %0, %1
     ret <2 x i16> %3
@@ -597,6 +605,7 @@ define <1 x i32> @shl_v1i32(<1 x i32> %0, <1 x i32> %1){
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    lsl w8, w8, w9
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = shl <1 x i32> %0, %1
     ret <1 x i32> %3
@@ -706,6 +715,7 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){
 ; CHECK-GI-NEXT:    neg v1.8b, v1.8b
 ; CHECK-GI-NEXT:    sshl v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = ashr <4 x i8> %0, %1
     ret <4 x i8> %3
@@ -741,6 +751,7 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){
 ; CHECK-GI-NEXT:    neg v1.4h, v1.4h
 ; CHECK-GI-NEXT:    sshl v0.4h, v0.4h, v1.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = ashr <2 x i16> %0, %1
     ret <2 x i16> %3
@@ -771,6 +782,7 @@ define <1 x i32> @ashr_v1i32(<1 x i32> %0, <1 x i32> %1){
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    asr w8, w8, w9
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = ashr <1 x i32> %0, %1
     ret <1 x i32> %3
@@ -877,6 +889,7 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){
 ; CHECK-GI-NEXT:    neg v1.8b, v1.8b
 ; CHECK-GI-NEXT:    ushl v0.8b, v0.8b, v1.8b
 ; CHECK-GI-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = lshr <4 x i8> %0, %1
     ret <4 x i8> %3
@@ -911,6 +924,7 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){
 ; CHECK-GI-NEXT:    neg v1.4h, v1.4h
 ; CHECK-GI-NEXT:    ushl v0.4h, v0.4h, v1.4h
 ; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = lshr <2 x i16> %0, %1
     ret <2 x i16> %3
@@ -941,6 +955,7 @@ define <1 x i32> @lshr_v1i32(<1 x i32> %0, <1 x i32> %1){
 ; CHECK-GI-NEXT:    fmov w9, s1
 ; CHECK-GI-NEXT:    lsr w8, w8, w9
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %3 = lshr <1 x i32> %0, %1
     ret <1 x i32> %3

diff  --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll
index 5e06b2d2aa87cb..235fee718f539d 100644
--- a/llvm/test/CodeGen/AArch64/shift_minsize.ll
+++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll
@@ -34,16 +34,19 @@ define i32 @f1(i64 %x, i64 %y) minsize optsize {
 ; CHECK-LABEL: f1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl x0, x0, x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-WIN-LABEL: f1:
 ; CHECK-WIN:       // %bb.0:
 ; CHECK-WIN-NEXT:    lsl x0, x0, x1
+; CHECK-WIN-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-WIN-NEXT:    ret
 ;
 ; CHECK-DARWIN-LABEL: f1:
 ; CHECK-DARWIN:       ; %bb.0:
 ; CHECK-DARWIN-NEXT:    lsl x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-DARWIN-NEXT:    ret
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -54,16 +57,19 @@ define i32 @f2(i64 %x, i64 %y) minsize optsize {
 ; CHECK-LABEL: f2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    asr x0, x0, x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-WIN-LABEL: f2:
 ; CHECK-WIN:       // %bb.0:
 ; CHECK-WIN-NEXT:    asr x0, x0, x1
+; CHECK-WIN-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-WIN-NEXT:    ret
 ;
 ; CHECK-DARWIN-LABEL: f2:
 ; CHECK-DARWIN:       ; %bb.0:
 ; CHECK-DARWIN-NEXT:    asr x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-DARWIN-NEXT:    ret
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -74,16 +80,19 @@ define i32 @f3(i64 %x, i64 %y) minsize optsize {
 ; CHECK-LABEL: f3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsr x0, x0, x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-WIN-LABEL: f3:
 ; CHECK-WIN:       // %bb.0:
 ; CHECK-WIN-NEXT:    lsr x0, x0, x1
+; CHECK-WIN-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-WIN-NEXT:    ret
 ;
 ; CHECK-DARWIN-LABEL: f3:
 ; CHECK-DARWIN:       ; %bb.0:
 ; CHECK-DARWIN-NEXT:    lsr x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
 ; CHECK-DARWIN-NEXT:    ret
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32

diff  --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
index 2c9dc42dc0767f..fb571eff39fe50 100644
--- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
+++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
@@ -21,8 +21,12 @@
 define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v4i8_16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
   %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -43,8 +47,12 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i
 define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v4i8_8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr d4, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b
 ; CHECK-NEXT:    ret
   %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -93,8 +101,12 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
 define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    adrp x8, .LCPI2_0
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v2.d[1], v3.d[0]
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:    adrp x8, .LCPI2_1
@@ -202,8 +214,10 @@ define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x
 ; CHECK-LABEL: shuffle4_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d5, d2
-; CHECK-NEXT:    fmov d4, d0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    fmov d4, d0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
 ; CHECK-NEXT:    mov v4.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v5.d[1], v3.d[0]
@@ -261,15 +275,17 @@ define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
 define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v8i8_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    adrp x8, .LCPI6_0
 ; CHECK-NEXT:    mov v2.d[1], v2.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
-; CHECK-NEXT:    adrp x8, .LCPI6_0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI6_0]
 ; CHECK-NEXT:    adrp x8, .LCPI6_1
-; CHECK-NEXT:    tbl v2.8b, { v2.16b }, v1.8b
-; CHECK-NEXT:    tbl v1.8b, { v0.16b }, v1.8b
+; CHECK-NEXT:    tbl v3.8b, { v2.16b }, v1.8b
+; CHECK-NEXT:    tbl v2.8b, { v0.16b }, v1.8b
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI6_1]
-; CHECK-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
+; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
 ; CHECK-NEXT:    ret
   %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
   %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
@@ -298,9 +314,11 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8
 define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) {
 ; CHECK-LABEL: shuffle4_v8i8_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    adrp x8, .LCPI7_0
 ; CHECK-NEXT:    mov v2.d[1], v2.d[0]
 ; CHECK-NEXT:    mov v0.d[1], v0.d[0]
-; CHECK-NEXT:    adrp x8, .LCPI7_0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI7_0]
 ; CHECK-NEXT:    adrp x8, .LCPI7_1
 ; CHECK-NEXT:    tbl v2.8b, { v2.16b }, v1.8b
@@ -336,8 +354,10 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x
 ; CHECK-LABEL: shuffle4_v4i8_zext:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d5, d2
-; CHECK-NEXT:    fmov d4, d0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
 ; CHECK-NEXT:    adrp x8, .LCPI8_0
+; CHECK-NEXT:    fmov d4, d0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI8_0]
 ; CHECK-NEXT:    mov v4.d[1], v1.d[0]
 ; CHECK-NEXT:    mov v5.d[1], v3.d[0]
@@ -373,8 +393,12 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x
 define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) {
 ; CHECK-LABEL: shuffle4_v4i16_trunc:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    adrp x8, .LCPI9_0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI9_0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
 ; CHECK-NEXT:    ret
   %a = trunc <4 x i16> %ae to <4 x i8>
@@ -408,13 +432,13 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %
 define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) {
 ; CHECK-LABEL: shuffle4_v4i32_trunc:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
+; CHECK-NEXT:    xtn v4.4h, v0.4s
 ; CHECK-NEXT:    adrp x8, .LCPI10_0
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v3.4h, v3.4s
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI10_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b
+; CHECK-NEXT:    xtn v5.4h, v1.4s
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI10_0]
+; CHECK-NEXT:    xtn v6.4h, v2.4s
+; CHECK-NEXT:    xtn v7.4h, v3.4s
+; CHECK-NEXT:    tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b
 ; CHECK-NEXT:    ret
   %a = trunc <4 x i32> %ae to <4 x i8>
   %b = trunc <4 x i32> %be to <4 x i8>
@@ -446,8 +470,11 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %
 define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) {
 ; CHECK-LABEL: shuffle3_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2
 ; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b
 ; CHECK-NEXT:    ret
   %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -477,8 +504,9 @@ define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) {
 ; CHECK-LABEL: shuffle3_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov d3, d2
-; CHECK-NEXT:    fmov d2, d0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    adrp x8, .LCPI12_0
+; CHECK-NEXT:    fmov d2, d0
 ; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI12_0]
 ; CHECK-NEXT:    mov v2.d[1], v1.d[0]
 ; CHECK-NEXT:    tbl v0.16b, { v2.16b, v3.16b }, v0.16b
@@ -534,11 +562,13 @@ define <4 x i32> @shuffle3_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) {
 ; CHECK-LABEL: insert4_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v4.16b, v3.16b
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
-; CHECK-NEXT:    mov v3.16b, v1.16b
 ; CHECK-NEXT:    adrp x9, .LCPI14_1
+; CHECK-NEXT:    mov v0.d[1], v2.d[0]
+; CHECK-NEXT:    mov v3.16b, v1.16b
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI14_1]
 ; CHECK-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
@@ -602,13 +632,16 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8>
 define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) {
 ; CHECK-LABEL: insert4_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov v4.16b, v3.16b
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q31_q0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    mov v3.16b, v1.16b
+; CHECK-NEXT:    ldr q5, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    mov v0.d[1], v2.d[0]
-; CHECK-NEXT:    mov v2.16b, v1.16b
-; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    adrp x8, .LCPI15_1
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI15_1]
-; CHECK-NEXT:    tbl v31.16b, { v2.16b, v3.16b }, v4.16b
+; CHECK-NEXT:    tbl v31.16b, { v3.16b, v4.16b }, v5.16b
 ; CHECK-NEXT:    tbl v0.16b, { v31.16b, v0.16b }, v1.16b
 ; CHECK-NEXT:    ret
   %e1 = extractelement <8 x i8> %a, i32 4
@@ -672,7 +705,6 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2
 ; CHECK-NEXT:    adrp x8, .LCPI16_0
 ; CHECK-NEXT:    frintm v1.2d, v1.2d
 ; CHECK-NEXT:    frintm v5.2d, v5.2d
-; CHECK-NEXT:    ldr q16, [x8, :lo12:.LCPI16_0]
 ; CHECK-NEXT:    frintm v2.2d, v2.2d
 ; CHECK-NEXT:    frintm v6.2d, v6.2d
 ; CHECK-NEXT:    frintm v3.2d, v3.2d
@@ -685,16 +717,17 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2
 ; CHECK-NEXT:    fcvtzs v6.2d, v6.2d
 ; CHECK-NEXT:    fcvtzs v3.2d, v3.2d
 ; CHECK-NEXT:    fcvtzs v7.2d, v7.2d
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    xtn v4.2s, v4.2d
-; CHECK-NEXT:    xtn v1.2s, v1.2d
-; CHECK-NEXT:    xtn v5.2s, v5.2d
-; CHECK-NEXT:    xtn v2.2s, v2.2d
-; CHECK-NEXT:    xtn v6.2s, v6.2d
-; CHECK-NEXT:    xtn v3.2s, v3.2d
-; CHECK-NEXT:    xtn v7.2s, v7.2d
-; CHECK-NEXT:    tbl v1.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b
-; CHECK-NEXT:    tbl v2.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b
+; CHECK-NEXT:    xtn v16.2s, v0.2d
+; CHECK-NEXT:    xtn v20.2s, v4.2d
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI16_0]
+; CHECK-NEXT:    xtn v17.2s, v1.2d
+; CHECK-NEXT:    xtn v21.2s, v5.2d
+; CHECK-NEXT:    xtn v18.2s, v2.2d
+; CHECK-NEXT:    xtn v22.2s, v6.2d
+; CHECK-NEXT:    xtn v19.2s, v3.2d
+; CHECK-NEXT:    xtn v23.2s, v7.2d
+; CHECK-NEXT:    tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
+; CHECK-NEXT:    tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b
 ; CHECK-NEXT:    uzp1 v0.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    uzp2 v1.8h, v1.8h, v2.8h
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll
index 18f56d2fec7916..41dd7f06712d24 100644
--- a/llvm/test/CodeGen/AArch64/shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/shuffles.ll
@@ -316,6 +316,8 @@ define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b)
 define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
 ; CHECKLE-LABEL: test_shuf7:
 ; CHECKLE:       // %bb.0:
+; CHECKLE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECKLE-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECKLE-NEXT:    mov v0.h[2], v1.h[3]
 ; CHECKLE-NEXT:    trn1 v0.4h, v0.4h, v0.4h
 ; CHECKLE-NEXT:    ret
@@ -336,8 +338,10 @@ define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b)
 define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
 ; CHECKLE-LABEL: test_shuf8:
 ; CHECKLE:       // %bb.0:
-; CHECKLE-NEXT:    mov v0.d[1], v1.d[0]
+; CHECKLE-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECKLE-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECKLE-NEXT:    adrp x8, .LCPI12_0
+; CHECKLE-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECKLE-NEXT:    ldr d1, [x8, :lo12:.LCPI12_0]
 ; CHECKLE-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECKLE-NEXT:    ret
@@ -362,7 +366,9 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
 ; CHECKLE-LABEL: test_shuf9:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI13_0
+; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI13_0]
+; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -372,10 +378,10 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI13_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI13_0
-; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -412,7 +418,9 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf11:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI15_0
+; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI15_0]
+; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -422,10 +430,10 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI15_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI15_0
-; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -438,7 +446,9 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf12:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI16_0
+; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI16_0]
+; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -448,10 +458,10 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI16_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI16_0
-; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -464,7 +474,9 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf13:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI17_0
+; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI17_0]
+; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -474,10 +486,10 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI17_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI17_0
-; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -490,7 +502,9 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf14:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI18_0
+; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI18_0]
+; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -500,10 +514,10 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI18_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI18_0
-; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret
@@ -516,7 +530,9 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
 ; CHECKLE-LABEL: test_shuf15:
 ; CHECKLE:       // %bb.0:
 ; CHECKLE-NEXT:    adrp x8, .LCPI19_0
+; CHECKLE-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    ldr q2, [x8, :lo12:.LCPI19_0]
+; CHECKLE-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECKLE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECKLE-NEXT:    ret
 ;
@@ -526,10 +542,10 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b)
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    adrp x8, .LCPI19_0
 ; CHECKBE-NEXT:    add x8, x8, :lo12:.LCPI19_0
-; CHECKBE-NEXT:    ld1 { v2.16b }, [x8]
-; CHECKBE-NEXT:    ext v1.16b, v1.16b, v1.16b, #8
-; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECKBE-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECKBE-NEXT:    ext v2.16b, v1.16b, v1.16b, #8
+; CHECKBE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECKBE-NEXT:    ld1 { v0.16b }, [x8]
+; CHECKBE-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
 ; CHECKBE-NEXT:    rev64 v0.16b, v0.16b
 ; CHECKBE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
 ; CHECKBE-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll
index d33e36c487e45b..0f5b240e387ed0 100644
--- a/llvm/test/CodeGen/AArch64/shufflevector.ll
+++ b/llvm/test/CodeGen/AArch64/shufflevector.ll
@@ -7,30 +7,46 @@
 define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-SD-LABEL: shufflevector_v8i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    adrp x8, .LCPI0_0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v8i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI0_0
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
     ret <8 x i8> %c
 }
 
 define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: shufflevector_v16i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI1_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: shufflevector_v16i8:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    adrp x8, .LCPI1_0
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shufflevector_v16i8:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI1_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI1_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ret
     %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15, i32 2, i32 4, i32 6, i32 8, i32 25, i32 30, i32 31, i32 31>
     ret <16 x i8> %c
 }
@@ -45,12 +61,23 @@ define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) {
 }
 
 define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: shufflevector_v8i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: shufflevector_v8i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    adrp x8, .LCPI3_0
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shufflevector_v8i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI3_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ret
     %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
     ret <8 x i16> %c
 }
@@ -96,6 +123,7 @@ define <2 x ptr> @shufflevector_v2p0(<2 x ptr> %a, <2 x ptr> %b) {
 define <8 x i8> @shufflevector_v8i8_zeroes(<8 x i8> %a, <8 x i8> %b) {
 ; CHECK-LABEL: shufflevector_v8i8_zeroes:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -114,6 +142,7 @@ define <16 x i8> @shufflevector_v16i8_zeroes(<16 x i8> %a, <16 x i8> %b) {
 define <4 x i16> @shufflevector_v4i16_zeroes(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: shufflevector_v4i16_zeroes:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -132,6 +161,7 @@ define <8 x i16> @shufflevector_v8i16_zeroes(<8 x i16> %a, <8 x i16> %b) {
 define <2 x i32> @shufflevector_v2i32_zeroes(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: shufflevector_v2i32_zeroes:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 0>
@@ -170,12 +200,17 @@ define <2 x ptr> @shufflevector_v2p0_zeroes(<2 x ptr> %a, <2 x ptr> %b) {
 define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-SD-LABEL: shufflevector_v2i1:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.s[1], v1.s[1]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v2i1:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    mov w8, v1.s[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w9, v0.s[1]
 ; CHECK-GI-NEXT:    mov v1.b[1], w8
 ; CHECK-GI-NEXT:    mov v0.b[1], w9
@@ -185,6 +220,7 @@ define <2 x i1> @shufflevector_v2i1(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-GI-NEXT:    umov w9, v0.b[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    mov v0.s[1], w9
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 3>
     ret <2 x i1> %c
@@ -221,25 +257,26 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){
 define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){
 ; CHECK-SD-LABEL: shufflevector_v32i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v1.16b, v2.16b
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 def $q1_q2
 ; CHECK-SD-NEXT:    adrp x8, .LCPI18_0
 ; CHECK-SD-NEXT:    adrp x9, .LCPI18_1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI18_0]
-; CHECK-SD-NEXT:    ldr q3, [x9, :lo12:.LCPI18_1]
-; CHECK-SD-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
-; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI18_0]
+; CHECK-SD-NEXT:    ldr q4, [x9, :lo12:.LCPI18_1]
+; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v32i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov v3.16b, v0.16b
 ; CHECK-GI-NEXT:    adrp x8, .LCPI18_1
 ; CHECK-GI-NEXT:    adrp x9, .LCPI18_0
+; CHECK-GI-NEXT:    mov v4.16b, v2.16b
 ; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI18_1]
-; CHECK-GI-NEXT:    ldr q3, [x9, :lo12:.LCPI18_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
-; CHECK-GI-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-GI-NEXT:    ldr q1, [x9, :lo12:.LCPI18_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v3.16b, v4.16b }, v0.16b
+; CHECK-GI-NEXT:    tbl v1.16b, { v3.16b, v4.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
     ret <32 x i8> %c
@@ -277,25 +314,26 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){
 define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){
 ; CHECK-SD-LABEL: shufflevector_v16i16:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v1.16b, v2.16b
+; CHECK-SD-NEXT:    // kill: def $q2 killed $q2 def $q1_q2
 ; CHECK-SD-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-SD-NEXT:    adrp x9, .LCPI20_1
-; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI20_0]
-; CHECK-SD-NEXT:    ldr q3, [x9, :lo12:.LCPI20_1]
-; CHECK-SD-NEXT:    tbl v2.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-SD-NEXT:    tbl v1.16b, { v0.16b, v1.16b }, v3.16b
-; CHECK-SD-NEXT:    mov v0.16b, v2.16b
+; CHECK-SD-NEXT:    mov v1.16b, v0.16b
+; CHECK-SD-NEXT:    ldr q3, [x8, :lo12:.LCPI20_0]
+; CHECK-SD-NEXT:    ldr q4, [x9, :lo12:.LCPI20_1]
+; CHECK-SD-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-SD-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v4.16b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v16i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov v3.16b, v0.16b
 ; CHECK-GI-NEXT:    adrp x8, .LCPI20_1
 ; CHECK-GI-NEXT:    adrp x9, .LCPI20_0
+; CHECK-GI-NEXT:    mov v4.16b, v2.16b
 ; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI20_1]
-; CHECK-GI-NEXT:    ldr q3, [x9, :lo12:.LCPI20_0]
-; CHECK-GI-NEXT:    tbl v0.16b, { v1.16b, v2.16b }, v0.16b
-; CHECK-GI-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-GI-NEXT:    ldr q1, [x9, :lo12:.LCPI20_0]
+; CHECK-GI-NEXT:    tbl v0.16b, { v3.16b, v4.16b }, v0.16b
+; CHECK-GI-NEXT:    tbl v1.16b, { v3.16b, v4.16b }, v1.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
     ret <16 x i16> %c
@@ -322,8 +360,10 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK-GI-LABEL: shufflevector_v8i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI22_0
+; CHECK-GI-NEXT:    // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
 ; CHECK-GI-NEXT:    ldr q4, [x8, :lo12:.LCPI22_0]
+; CHECK-GI-NEXT:    // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
 ; CHECK-GI-NEXT:    tbl v1.16b, { v2.16b, v3.16b }, v4.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12, i32 15>
@@ -399,11 +439,13 @@ define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
 define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-SD-LABEL: shufflevector_v2i1_zeroes:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v2i1_zeroes:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov w8, v0.s[1]
 ; CHECK-GI-NEXT:    mov v0.b[1], w8
 ; CHECK-GI-NEXT:    dup v0.8b, v0.b[0]
@@ -411,6 +453,7 @@ define <2 x i1> @shufflevector_v2i1_zeroes(<2 x i1> %a, <2 x i1> %b){
 ; CHECK-GI-NEXT:    umov w9, v0.b[1]
 ; CHECK-GI-NEXT:    mov v0.s[0], w8
 ; CHECK-GI-NEXT:    mov v0.s[1], w9
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <2 x i1> %a, <2 x i1> %b, <2 x i32> <i32 0, i32 0>
     ret <2 x i1> %c
@@ -421,6 +464,7 @@ define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-SD-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
 ; CHECK-SD-NEXT:    fmov w0, s0
@@ -453,6 +497,7 @@ define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    sub sp, sp, #16
 ; CHECK-SD-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    dup v1.2s, v0.s[0]
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    strh w9, [sp, #12]
@@ -554,18 +599,23 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) {
 define <7 x i8> @shufflevector_v7i8(<7 x i8> %a, <7 x i8> %b) {
 ; CHECK-SD-LABEL: shufflevector_v7i8:
 ; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    adrp x8, .LCPI36_0
+; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
 ; CHECK-SD-NEXT:    tbl v0.8b, { v0.16b }, v1.8b
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: shufflevector_v7i8:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI36_0
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI36_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
     ret <7 x i8> %c
@@ -580,22 +630,36 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) {
 ;
 ; CHECK-GI-LABEL: shufflevector_v3i16:
 ; CHECK-GI:       // %bb.0:
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-GI-NEXT:    adrp x8, .LCPI37_0
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-GI-NEXT:    ldr d1, [x8, :lo12:.LCPI37_0]
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b }, v1.16b
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 1, i32 2, i32 4>
     ret <3 x i16> %c
 }
 
 define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) {
-; CHECK-LABEL: shufflevector_v7i16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI38_0
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
-; CHECK-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: shufflevector_v7i16:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    adrp x8, .LCPI38_0
+; CHECK-SD-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
+; CHECK-SD-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-SD-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: shufflevector_v7i16:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI38_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI38_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
+; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT:    ret
     %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> <i32 1, i32 3, i32 5, i32 7, i32 8, i32 10, i32 12>
     ret <7 x i16> %c
 }
@@ -610,7 +674,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) {
 ; CHECK-GI-LABEL: shufflevector_v3i32:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    adrp x8, .LCPI39_0
+; CHECK-GI-NEXT:    // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    ldr q2, [x8, :lo12:.LCPI39_0]
+; CHECK-GI-NEXT:    // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
 ; CHECK-GI-NEXT:    tbl v0.16b, { v0.16b, v1.16b }, v2.16b
 ; CHECK-GI-NEXT:    ret
     %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> <i32 1, i32 2, i32 4>
@@ -643,6 +709,7 @@ define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) {
 define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
 ; CHECK-LABEL: shufflevector_v7i8_zeroes:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.8b, v0.b[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <7 x i8> %a, <7 x i8> %b, <7 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -652,6 +719,7 @@ define <7 x i8> @shufflevector_v7i8_zeroes(<7 x i8> %a, <7 x i8> %b) {
 define <3 x i16> @shufflevector_v3i16_zeroes(<3 x i16> %a, <3 x i16> %b) {
 ; CHECK-LABEL: shufflevector_v3i16_zeroes:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
     %c = shufflevector <3 x i16> %a, <3 x i16> %b, <3 x i32> <i32 0, i32 0, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 4d383fefc43c7b..f65a08ae2acea0 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -298,6 +298,7 @@ exit:
 define i32 @f6(i1 %c, ptr %a, i32 %i) {
 ; CHECK-LABEL: f6:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    tbz w0, #0, .LBB6_2
 ; CHECK-NEXT:  // %bb.1: // %if.then
 ; CHECK-NEXT:    mov w0, wzr

diff  --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
index 1341fa783d06b9..aee705f0be9b9b 100644
--- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
+++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
@@ -133,6 +133,7 @@ define target("aarch64.svcount") @test_sel(target("aarch64.svcount") %x, target(
 ;
 ; CHECK-O3-LABEL: test_sel:
 ; CHECK-O3:       // %bb.0:
+; CHECK-O3-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-O3-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-O3-NEXT:    whilelo p2.b, xzr, x8
 ; CHECK-O3-NEXT:    sel p0.b, p2, p0.b, p1.b

diff  --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
index 1fd616938822e8..8e3866fcec89a7 100644
--- a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
+++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll
@@ -28,6 +28,7 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind
   ; CHECK-REGALLOC-NEXT:   STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
   ; CHECK-REGALLOC-NEXT:   renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL killed renamable $q0, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
@@ -58,6 +59,7 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind {
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
+  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL renamable $q0, implicit killed $z0
   ; CHECK-REGALLOC-NEXT:   STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg
   ; CHECK-REGALLOC-NEXT:   $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
@@ -93,6 +95,7 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta
   ; CHECK-REGALLOC-NEXT:   STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0)
   ; CHECK-REGALLOC-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
   ; CHECK-REGALLOC-NEXT:   renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+  ; CHECK-REGALLOC-NEXT:   renamable $q0 = KILL killed renamable $q0, implicit-def $z0
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
   ; CHECK-REGALLOC-NEXT:   BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp
   ; CHECK-REGALLOC-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp

diff  --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
index d40618f2678b69..57f8e5438eaf2b 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll
@@ -299,6 +299,7 @@ define void @ldr_with_off_16mulvl(ptr %ptr) {
 define void @ldr_with_off_var(ptr %base, i32 %off) {
 ; CHECK-LABEL: ldr_with_off_var:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    rdsvl x9, #1
 ; CHECK-NEXT:    add w12, w1, #16

diff  --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
index 03c1f28fbaa184..1ff32aade4a1f9 100644
--- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll
@@ -299,6 +299,7 @@ define void @str_with_off_16mulvl(ptr %ptr) {
 define void @str_with_off_var(ptr %base, i32 %off) {
 ; CHECK-LABEL: str_with_off_var:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    sxtw x8, w1
 ; CHECK-NEXT:    rdsvl x9, #1
 ; CHECK-NEXT:    add w12, w1, #16

diff  --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
index f4ef07bcea7132..500c51159dd91f 100644
--- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
+++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll
@@ -160,9 +160,11 @@ define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
@@ -199,9 +201,11 @@ define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload
@@ -238,9 +242,11 @@ define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -282,9 +288,11 @@ define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -322,9 +330,11 @@ define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -362,9 +372,11 @@ define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -402,9 +414,11 @@ define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -442,9 +456,11 @@ define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
@@ -482,9 +498,11 @@ define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -522,9 +540,11 @@ define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload
@@ -566,9 +586,11 @@ define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -605,9 +627,11 @@ define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -644,9 +668,11 @@ define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -683,9 +709,11 @@ define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -722,9 +750,11 @@ define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -761,9 +791,11 @@ define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -800,9 +832,11 @@ define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -839,9 +873,11 @@ define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
@@ -881,10 +917,12 @@ define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 {
 ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    mov x19, x0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    and z1.b, z1.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
 ; CHECK-NEXT:    str p0, [x8, #7, mul vl] // 2-byte Folded Spill
@@ -1051,8 +1089,9 @@ define void @dont_coalesce_res_f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_f16
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1290,8 +1329,9 @@ define void @dont_coalesce_res_v1f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v1f16
 ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1396,8 +1436,9 @@ define void @dont_coalesce_res_v16i8(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v16i8
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1b { z0.b }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1429,8 +1470,9 @@ define void @dont_coalesce_res_v8i16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v8i16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1462,8 +1504,9 @@ define void @dont_coalesce_res_v4i32(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v4i32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1495,8 +1538,9 @@ define void @dont_coalesce_res_v2i64(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v2i64
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1528,8 +1572,9 @@ define void @dont_coalesce_res_v8f16(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v8f16
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1561,8 +1606,9 @@ define void @dont_coalesce_res_v4f32(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v4f32
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload
@@ -1594,8 +1640,9 @@ define void @dont_coalesce_res_v2f64(ptr %ptr) #0 {
 ; CHECK-NEXT:    bl get_v2f64
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstart sm
-; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x19]
 ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
index 05b62d2b6abcbf..572b1fff3520a9 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll
@@ -112,8 +112,10 @@ define <2 x i64> @locally_streaming_caller_no_callee(<2 x i64> %a) "aarch64_psta
 ; CHECK-NEXT:    smstart sm
 ; CHECK-NEXT:    index z0.d, #0, #1
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
 ; CHECK-NEXT:    add z0.d, z0.d, #41 // =0x29
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    smstop sm
 ; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index 104b640f0f9804..58992eb2d592a6 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -139,10 +139,12 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    add x8, sp, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __arm_sme_state
 ; CHECK-NEXT:    add x8, sp, #16
 ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    and x19, x0, #0x1
 ; CHECK-NEXT:    tbz w19, #0, .LBB4_2
@@ -160,7 +162,9 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr z1, [x8] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
index 2774394e46d6dd..38d3bed2eaf907 100644
--- a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
@@ -6,6 +6,8 @@
 define <vscale x 16 x i8> @fcvt_x2(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) {
 ; CHECK-LABEL: fcvt_x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvt z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1)
@@ -15,6 +17,10 @@ define <vscale x 16 x i8> @fcvt_x2(<vscale x 8 x half> %zn0, <vscale x 8 x half>
 define <vscale x 16 x i8> @fcvt_x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
 ; CHECK-LABEL: fcvt_x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fcvt z0.b, { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
@@ -25,6 +31,10 @@ define <vscale x 16 x i8> @fcvt_x4(<vscale x 4 x float> %zn0, <vscale x 4 x floa
 define <vscale x 16 x i8> @fcvtn(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
 ; CHECK-LABEL: fcvtn:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fcvtn z0.b, { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
@@ -35,6 +45,8 @@ define <vscale x 16 x i8> @fcvtn(<vscale x 4 x float> %zn0, <vscale x 4 x float>
 define <vscale x 16 x i8> @bfcvt(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) {
 ; CHECK-LABEL: bfcvt:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvt z0.b, { z0.h, z1.h }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
index d436d647e6de0f..dbf47d980d26eb 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll
@@ -6,7 +6,9 @@ target triple = "aarch64-linux"
 define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #0 {
 ; CHECK-LABEL: add_f16_vg1x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    fadd za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -19,7 +21,11 @@ define void @add_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
 ; CHECK-LABEL: add_f16_vg1x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    fadd za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -35,7 +41,9 @@ define void @add_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) #1 {
 ; CHECK-LABEL: sub_f16_vg1x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    fsub za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -48,7 +56,11 @@ define void @sub_f16_vg1x2(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1,
 ; CHECK-LABEL: sub_f16_vg1x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    fsub za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -64,7 +76,9 @@ define void @sub_f16_vg1x4(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x h
 define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
 ; CHECK-LABEL: add_bf16_vg1x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfadd za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    bfadd za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -77,7 +91,11 @@ define void @add_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
 ; CHECK-LABEL: add_bf16_vg1x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfadd za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    bfadd za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -93,7 +111,9 @@ define void @add_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) #2 {
 ; CHECK-LABEL: sub_bf16_vg1x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfsub za.h[w8, 0, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    bfsub za.h[w8, 7, vgx2], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -106,7 +126,11 @@ define void @sub_bf16_vg1x2(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8
 define void @sub_bf16_vg1x4(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1,
 ; CHECK-LABEL: sub_bf16_vg1x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfsub za.h[w8, 0, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    bfsub za.h[w8, 7, vgx4], { z0.h - z3.h }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
index ea5f46481551d4..eee577ce48349f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll
@@ -8,7 +8,9 @@
 define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -25,7 +27,9 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
 define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -46,7 +50,11 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
 define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -67,7 +75,11 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
 define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    add za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -93,7 +105,11 @@ define void @multi_vector_add_write_single_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -112,7 +128,11 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -135,7 +155,15 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -159,7 +187,15 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -187,7 +223,9 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -200,7 +238,9 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    add za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -213,7 +253,9 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    fadd za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -228,7 +270,9 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fadd za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    fadd za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -245,7 +289,11 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    add za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -262,7 +310,11 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    add za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    add za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -279,7 +331,11 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    fadd za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -296,7 +352,11 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_za_vg1x4_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3) {
 ; CHECK-LABEL: multi_vector_add_za_vg1x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fadd za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    fadd za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
index f539ac3d0904d9..9b68f25bb0649c 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll
@@ -7,6 +7,8 @@
 define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: multi_vector_cvtn_x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fcvtn z0.h, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtn.x2.nxv4f32(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)
@@ -20,6 +22,8 @@ define <vscale x 8 x half> @multi_vector_cvtn_x2_f16(<vscale x 4 x float> %zn1,
 define <vscale x 8 x bfloat> @multi_vector_bfcvtn_x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: multi_vector_bfcvtn_x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfcvtn z0.h, { z0.s, z1.s }
 ; CHECK-NEXT:    ret
   %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvtn.x2(<vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
index 985613ccdd866b..ecfbb47ba55719 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll
@@ -53,20 +53,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }@multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    famax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    famax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.famax.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1,  <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4)
   ret { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } %res
@@ -75,20 +75,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    famax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    famax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.famax.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1,  <vscale x 4 x float> %zm2,  <vscale x 4 x float> %zm3,  <vscale x 4 x float> %zm4)
   ret { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } %res
@@ -97,20 +97,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    famax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    famax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sme.famax.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
@@ -171,20 +171,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_main_multi_x2
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    famin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    famin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sme.famin.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -195,20 +195,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    famin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    famin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sme.famin.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -219,20 +219,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    famin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    famin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sme.famin.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
index a03af63b86a73e..006e8aa2475ba0 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll
@@ -6,7 +6,9 @@
 define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -23,7 +25,9 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -40,7 +44,11 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_single_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -60,7 +68,11 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_single_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -82,7 +94,9 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -99,7 +113,9 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -116,7 +132,11 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_single_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -136,7 +156,11 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, <vscale x 4 x float> %z
 define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_single_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -158,7 +182,11 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, <vscale x 2 x double> %
 define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
 ; CHECK-LABEL: multi_vector_add_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -176,7 +204,11 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_add_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
 ; CHECK-LABEL: multi_vector_add_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -212,7 +244,15 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, <vscale x 4 x float>
 define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -230,7 +270,15 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_add_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -272,7 +320,11 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, <vscale x 4 x float>
 define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1,
 ; CHECK-LABEL: multi_vector_sub_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -290,7 +342,11 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1,
 ; CHECK-LABEL: multi_vector_sub_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -308,7 +364,15 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <v
 define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -326,7 +390,15 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vs
 define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -346,7 +418,9 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <v
 define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    ret
@@ -363,7 +437,9 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    ret
@@ -382,8 +458,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
 ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s_regclass:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov z5.d, z0.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z5.d, z0.d
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx2], { z4.s, z5.s }, z2.s[3]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 %slice,
@@ -395,7 +471,11 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, <vscale x 4 x fl
 define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    ret
@@ -415,7 +495,11 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_add_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    ret
@@ -437,8 +521,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
 ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s_regclass:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z26.d, z3.d
-; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    mov z27.d, z0.d
 ; CHECK-NEXT:    fmla za.s[w8, 0, vgx4], { z24.s - z27.s }, z4.s[3]
@@ -456,7 +540,9 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, <vscale x 4 x fl
 define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3]
 ; CHECK-NEXT:    ret
@@ -473,7 +559,9 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1]
 ; CHECK-NEXT:    ret
@@ -490,7 +578,11 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3,
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3]
 ; CHECK-NEXT:    ret
@@ -510,7 +602,11 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3,
 ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
index 46e2040715e8f1..ca149f41dbb835 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll
@@ -26,18 +26,18 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <
 define void @fdot_multi_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: fdot_multi_za32_f16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x half> %zn4, <vscale x 8 x half> %zn5, <vscale x 8 x half> %zn6, <vscale x 8 x half> %zn7) #0 {
   call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
@@ -71,18 +71,18 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
 define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    bfdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x bfloat> %zn4, <vscale x 8 x bfloat> %zn5, <vscale x 8 x bfloat> %zn6, <vscale x 8 x bfloat> %zn7) #0 {
   call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
@@ -99,7 +99,9 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
 define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) #0 {
 ; CHECK-LABEL: fdot_single_za32_f16_vg1x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -112,7 +114,11 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
 define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) #0 {
 ; CHECK-LABEL: fdot_single_za32_f16_vg1x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -128,7 +134,9 @@ define void @fdot_single_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
 define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) #0 {
 ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -141,7 +149,11 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused
 define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) #0 {
 ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -158,8 +170,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused, <v
 ; CHECK-LABEL: fdot_lane_za32_f16_vg1x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    fdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    ret
@@ -173,8 +185,8 @@ define void @fdot_lane_za32_f16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused, <v
 ; CHECK-LABEL: fdot_lane_za32_f16_vg1x4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    fdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]
@@ -195,8 +207,8 @@ define void @bfdot_lane_za32_bf16_vg1x2(i32 %slice, <vscale x 16 x i8> %unused,
 ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3]
 ; CHECK-NEXT:    ret
@@ -210,8 +222,8 @@ define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, <vscale x 16 x i8> %unused,
 ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3]

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
index 7ef319694917ef..591fe8da6b79cd 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fscale.ll
@@ -6,6 +6,8 @@
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x2_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_half:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm)
@@ -15,6 +17,8 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x2_h
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x2_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_float:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.s, z1.s }, { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm)
@@ -24,6 +28,8 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x2
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_x2_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x2_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fscale { z0.d, z1.d }, { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.single.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm)
@@ -35,6 +41,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_single_x4_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_half:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm)
@@ -44,6 +54,10 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_single_x4_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2,  <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_float:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.s - z3.s }, { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm)
@@ -53,6 +67,10 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_single_x4_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vec_scale_single_x4_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fscale { z0.d - z3.d }, { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.single.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm)
@@ -63,6 +81,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x2_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_half:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.x2.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2)
@@ -72,6 +94,10 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x2_half( <v
 define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x2_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2 ) {
 ; CHECK-LABEL: multi_vec_scale_x2_float:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.s, z1.s }, { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.x2.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2)
@@ -81,6 +107,10 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x2_float(
 define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x2_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) {
 ; CHECK-LABEL: multi_vec_scale_x2_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    fscale { z0.d, z1.d }, { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.x2.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2)
@@ -91,6 +121,14 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x2_doub
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_scale_x4_half( <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_half:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sme.fp8.scale.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4)
@@ -100,6 +138,14 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_scale_x4_float( <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2,  <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_float:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.s - z3.s }, { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sme.fp8.scale.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x i32> %zm1,  <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4)
@@ -109,6 +155,14 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_scale_x4_double( <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_scale_x4_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    fscale { z0.d - z3.d }, { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sme.fp8.scale.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
index 3e6f8be057721f..c7a2ec16e3f7f6 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll
@@ -10,7 +10,9 @@
 define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.b[w12, 0:1], { z0.b, z1.b }
 ; CHECK-NEXT:    mov za0h.b[w12, 14:15], { z0.b, z1.b }
 ; CHECK-NEXT:    ret
@@ -23,7 +25,9 @@ define void @za_write_vg2_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -36,7 +40,9 @@ define void @za_write_vg2_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -49,7 +55,9 @@ define void @za_write_vg2_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscal
 define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -62,7 +70,9 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vs
 define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3h.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -75,7 +85,9 @@ define void @za_write_vg2_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3h.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -88,7 +100,9 @@ define void @za_write_vg2_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vsca
 define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
@@ -98,7 +112,9 @@ define void @za_write_vg2_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
 ; CHECK-LABEL: za_write_vg2_horiz_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0h.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
@@ -110,7 +126,9 @@ define void @za_write_vg2_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsc
 define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.b[w12, 0:1], { z0.b, z1.b }
 ; CHECK-NEXT:    mov za0v.b[w12, 14:15], { z0.b, z1.b }
 ; CHECK-NEXT:    ret
@@ -123,7 +141,9 @@ define void @za_write_vg2_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -136,7 +156,9 @@ define void @za_write_vg2_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -149,7 +171,9 @@ define void @za_write_vg2_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale
 define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.h[w12, 0:1], { z0.h, z1.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 6:7], { z0.h, z1.h }
 ; CHECK-NEXT:    ret
@@ -162,7 +186,9 @@ define void @za_write_vg2_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vsc
 define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3v.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -175,7 +201,9 @@ define void @za_write_vg2_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.s[w12, 0:1], { z0.s, z1.s }
 ; CHECK-NEXT:    mov za3v.s[w12, 2:3], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -188,7 +216,9 @@ define void @za_write_vg2_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscal
 define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2)
@@ -198,7 +228,9 @@ define void @za_write_vg2_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2) {
 ; CHECK-LABEL: za_write_vg2_vert_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za0v.d[w12, 0:1], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2)
@@ -214,7 +246,11 @@ define void @za_write_vg2_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsca
 define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.b[w12, 0:3], { z0.b - z3.b }
 ; CHECK-NEXT:    mov za0h.b[w12, 12:15], { z0.b - z3.b }
 ; CHECK-NEXT:    ret
@@ -227,7 +263,11 @@ define void @za_write_vg4_horiz_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -240,7 +280,11 @@ define void @za_write_vg4_horiz_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -253,7 +297,11 @@ define void @za_write_vg4_horiz_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscal
 define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1h.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -266,7 +314,11 @@ define void @za_write_vg4_horiz_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vs
 define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
@@ -276,7 +328,11 @@ define void @za_write_vg4_horiz_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
@@ -286,7 +342,11 @@ define void @za_write_vg4_horiz_f32(i32 %slice, <vscale x 4 x float> %zn1, <vsca
 define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
@@ -296,7 +356,11 @@ define void @za_write_vg4_horiz_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
 ; CHECK-LABEL: za_write_vg4_horiz_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0h.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
@@ -308,7 +372,11 @@ define void @za_write_vg4_horiz_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsc
 define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.b[w12, 0:3], { z0.b - z3.b }
 ; CHECK-NEXT:    mov za0v.b[w12, 12:15], { z0.b - z3.b }
 ; CHECK-NEXT:    ret
@@ -321,7 +389,11 @@ define void @za_write_vg4_vert_b(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x
 define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -334,7 +406,11 @@ define void @za_write_vg4_vert_h(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x
 define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -347,7 +423,11 @@ define void @za_write_vg4_vert_f16(i32 %slice, <vscale x 8 x half> %zn1, <vscale
 define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.h[w12, 0:3], { z0.h - z3.h }
 ; CHECK-NEXT:    mov za1v.h[w12, 4:7], { z0.h - z3.h }
 ; CHECK-NEXT:    ret
@@ -360,7 +440,11 @@ define void @za_write_vg4_vert_bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vsc
 define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
@@ -370,7 +454,11 @@ define void @za_write_vg4_vert_s(i32 %slice, <vscale x 4 x i32> %zn1, <vscale x
 define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.s[w12, 0:3], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3, <vscale x 4 x float> %zn4)
@@ -380,7 +468,11 @@ define void @za_write_vg4_vert_f32(i32 %slice, <vscale x 4 x float> %zn1, <vscal
 define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
@@ -390,7 +482,11 @@ define void @za_write_vg4_vert_d(i32 %slice, <vscale x 2 x i64> %zn1, <vscale x
 define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) {
 ; CHECK-LABEL: za_write_vg4_vert_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w12, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za0v.d[w12, 0:3], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
@@ -404,7 +500,9 @@ define void @za_write_vg4_vert_f64(i32 %slice, <vscale x 2 x double> %zn1, <vsca
 define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -417,7 +515,9 @@ define void @za_write_vg1x2_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16
 define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -430,7 +530,9 @@ define void @za_write_vg1x2_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x
 define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -443,7 +545,9 @@ define void @za_write_vg1x2_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x
 define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -456,7 +560,9 @@ define void @za_write_vg1x2_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale
 define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -469,7 +575,9 @@ define void @za_write_vg1x2_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x
 define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -482,7 +590,9 @@ define void @za_write_vg1x2_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x
 define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -495,7 +605,9 @@ define void @za_write_vg1x2_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x
 define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2) {
 ; CHECK-LABEL: za_write_vg1x2_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -512,7 +624,11 @@ define void @za_write_vg1x2_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale
 define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16 x i8> %za2, <vscale x 16 x i8> %za3, <vscale x 16 x i8> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_b:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -525,7 +641,11 @@ define void @za_write_vg1x4_b(i32 %slice, <vscale x 16 x i8> %za1, <vscale x 16
 define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x i16> %za2, <vscale x 8 x i16> %za3, <vscale x 8 x i16> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -538,7 +658,11 @@ define void @za_write_vg1x4_h(i32 %slice, <vscale x 8 x i16> %za1, <vscale x 8 x
 define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x 8 x half> %za2, <vscale x 8 x half> %za3, <vscale x 8 x half> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -551,7 +675,11 @@ define void @za_write_vg1x4_f16(i32 %slice, <vscale x 8 x half> %za1, <vscale x
 define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale x 8 x bfloat> %za2, <vscale x 8 x bfloat> %za3, <vscale x 8 x bfloat> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -564,7 +692,11 @@ define void @za_write_vg1x4_bf16(i32 %slice, <vscale x 8 x bfloat> %za1, <vscale
 define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x i32> %za2, <vscale x 4 x i32> %za3, <vscale x 4 x i32> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -577,7 +709,11 @@ define void @za_write_vg1x4_s(i32 %slice, <vscale x 4 x i32> %za1, <vscale x 4 x
 define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x 4 x float> %za2, <vscale x 4 x float> %za3, <vscale x 4 x float> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -590,7 +726,11 @@ define void @za_write_vg1x4_f32(i32 %slice, <vscale x 4 x float> %za1, <vscale x
 define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x i64> %za2, <vscale x 2 x i64> %za3, <vscale x 2 x i64> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_d:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -603,7 +743,11 @@ define void @za_write_vg1x4_d(i32 %slice, <vscale x 2 x i64> %za1, <vscale x 2 x
 define void @za_write_vg1x4_f64(i32 %slice, <vscale x 2 x double> %za1, <vscale x 2 x double> %za2, <vscale x 2 x double> %za3, <vscale x 2 x double> %za4) {
 ; CHECK-LABEL: za_write_vg1x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    mov za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
index b1ca793bac96ed..778f31194baf45 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-luti4.ll
@@ -6,6 +6,8 @@ target triple = "aarch64-linux"
 define {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>}  @test_luti4_zt_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) #0 {
 ; CHECK-LABEL: test_luti4_zt_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    luti4 { z0.b - z3.b }, zt0, { z0, z1 }
 ; CHECK-NEXT:    ret
   %res = call {<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>} @llvm.aarch64.sme.luti4.zt.x4.nxv16i8(i32 0, <vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1)

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
index cc27a8db7797d9..9d865b1e744716 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll
@@ -114,6 +114,8 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_single_x2_u64(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_max_single_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -314,6 +316,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_max_single_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -505,6 +511,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x2_u64(<v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_max_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_max_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmax.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -563,20 +573,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x2_
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -588,20 +598,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -613,20 +623,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -638,20 +648,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -665,20 +675,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_max_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -690,20 +700,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_max_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -715,20 +725,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_max_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -740,20 +750,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_max_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_u64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -767,6 +777,14 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_max_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_max_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmax.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -778,20 +796,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_max_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -803,20 +821,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_max_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -828,20 +846,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_max_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 ; CHECK-LABEL: multi_vec_max_multi_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -855,6 +873,8 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -907,6 +927,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_maxnm_single
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -974,6 +998,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_maxnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_maxnm_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1032,6 +1060,14 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x2_f64(
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_maxnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1043,20 +1079,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_maxnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1067,20 +1103,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_maxnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1091,20 +1127,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_maxnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_maxnm_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sve.fmaxnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
index 223dd355edb9cc..575bcbc919b8cc 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll
@@ -114,6 +114,8 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_single_x2_u64(<
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_min_single_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -314,6 +316,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_min_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_min_single_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -505,6 +511,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x2_u64(<v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_min_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_min_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fmin.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -563,20 +573,20 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x2_
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -588,20 +598,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -613,20 +623,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -638,20 +648,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -665,20 +675,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_min_multi_x4_u8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                            <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -690,20 +700,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_min_multi_x4_u16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -715,20 +725,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_min_multi_x4_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -740,20 +750,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_min_multi_x4_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_u64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
@@ -768,6 +778,14 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_min_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_min_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fmin.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -779,20 +797,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_min_multi_x4_f16(<vscale x 8 x half> %unused, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
@@ -804,20 +822,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_min_multi_x4_f32(<vscale x 4 x float> %unused, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
@@ -829,20 +847,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_min_multi_x4_f64(<vscale x 2 x double> %unused, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,
 ; CHECK-LABEL: multi_vec_min_multi_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                             <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
@@ -856,6 +874,8 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_single_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm)
@@ -908,6 +928,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> }  @multi_vec_minnm_single
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_minnm_single_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm)
@@ -975,6 +999,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @multi_vec_minnm_x2_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2) {
 ; CHECK-LABEL: multi_vec_minnm_x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
 ; CHECK-NEXT:    bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fminnm.x2.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2)
@@ -1033,6 +1061,14 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x2_f64(
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @multi_vec_minnm_x4_bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
 ; CHECK-NEXT:    bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>  } @llvm.aarch64.sve.fminnm.x4.nxv8bf16(<vscale x 8 x bfloat> %zdn1, <vscale x 8 x bfloat> %zdn2, <vscale x 8 x bfloat> %zdn3, <vscale x 8 x bfloat> %zdn4, <vscale x 8 x bfloat> %zm1, <vscale x 8 x bfloat> %zm2, <vscale x 8 x bfloat> %zm3, <vscale x 8 x bfloat> %zm4)
@@ -1044,20 +1080,20 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @multi_vec_minnm_x4_f16(<vscale x 8 x half> %dummy, <vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4, <vscale x 8 x half> %zm1, <vscale x 8 x half> %zm2, <vscale x 8 x half> %zm3, <vscale x 8 x half> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> }
               @llvm.aarch64.sve.fminnm.x4.nxv8f16(<vscale x 8 x half> %zdn1, <vscale x 8 x half> %zdn2, <vscale x 8 x half> %zdn3, <vscale x 8 x half> %zdn4,
@@ -1068,20 +1104,20 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @multi_vec_minnm_x4_f32(<vscale x 8 x half> %dummy, <vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4, <vscale x 4 x float> %zm1, <vscale x 4 x float> %zm2, <vscale x 4 x float> %zm3, <vscale x 4 x float> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }
               @llvm.aarch64.sve.fminnm.x4.nxv4f32(<vscale x 4 x float> %zdn1, <vscale x 4 x float> %zdn2, <vscale x 4 x float> %zdn3, <vscale x 4 x float> %zdn4,
@@ -1092,20 +1128,20 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @multi_vec_minnm_x4_f64(<vscale x 8 x half> %dummy, <vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4, <vscale x 2 x double> %zm1, <vscale x 2 x double> %zm2, <vscale x 2 x double> %zm3, <vscale x 2 x double> %zm4) {
 ; CHECK-LABEL: multi_vec_minnm_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> }
               @llvm.aarch64.sve.fminnm.x4.nxv2f64(<vscale x 2 x double> %zdn1, <vscale x 2 x double> %zdn2, <vscale x 2 x double> %zdn3, <vscale x 2 x double> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
index 70baa34d884b55..d33609b6351274 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll
@@ -38,7 +38,9 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -51,7 +53,9 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -66,7 +70,11 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -79,7 +87,11 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -130,18 +142,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -152,18 +164,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_add_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -205,8 +217,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    smlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -220,8 +232,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    smlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -237,8 +249,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -254,8 +266,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -302,7 +314,9 @@ define void @multi_vector_mul_add_single_long_vg4x1_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -315,7 +329,9 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -330,7 +346,11 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -343,7 +363,11 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_add_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -394,18 +418,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -416,18 +440,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_add_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -469,8 +493,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    umlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -484,8 +508,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    umlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -501,8 +525,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -518,8 +542,8 @@ define void @multi_vector_mul_add_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlall za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -566,7 +590,9 @@ define void @multi_vector_mul_sub_single_long_vg4x1_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -579,7 +605,9 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -594,7 +622,11 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -607,7 +639,11 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -658,18 +694,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -680,18 +716,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_s16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -733,8 +769,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    smlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -748,8 +784,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    smlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -765,8 +801,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -782,8 +818,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_s16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_s16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    smlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -830,7 +866,9 @@ define void @multi_vector_mul_sub_single_long_vg4x1_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -843,7 +881,9 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
@@ -858,7 +898,11 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, <vscale x 8
 define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -871,7 +915,11 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, <vscale x 16
 define void @multi_vector_mul_sub_single_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h
 ; CHECK-NEXT:    ret
@@ -922,18 +970,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -944,18 +992,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 define void @multi_vector_mul_sub_multi_long_vg4x4_u16(i32 %slice, <vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
+; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -997,8 +1045,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    umlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1012,8 +1060,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0]
 ; CHECK-NEXT:    umlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7]
 ; CHECK-NEXT:    ret
@@ -1029,8 +1077,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u8(i32 %slice, <vscale x 16 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlsll za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1046,8 +1094,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x4_u16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    umlsll za.d[w8, 0:3, vgx4], { z24.h - z27.h }, z5.h[0]
@@ -1068,7 +1116,9 @@ define void @multi_vector_mul_sub_lane_long_vg4x4_u16(i32 %slice, <vscale x 8 x
 define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -1083,7 +1133,11 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, <vscal
 define void @multi_vector_mul_add_single_signed_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -1114,8 +1168,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, <vscale
 ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    sumlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1131,8 +1185,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x4_s8(i32 %slice, <vscale
 ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    sumlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]
@@ -1166,7 +1220,9 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x1_s8(i32 %slice, <vsc
 define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
@@ -1181,7 +1237,11 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, <vsc
 define void @multi_vector_mul_add_single_unsigned_long_vg4x4_s8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b
 ; CHECK-NEXT:    ret
@@ -1215,18 +1275,18 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, <vsca
 define void @multi_vector_mul_add_multi_unsigned_long_vg4x4_u8(i32 %slice, <vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3) {
 ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z31.d, z4.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    mov z25.d, z6.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x1]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
+; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, <vscale x 16 x i8> %zn0, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zm0, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3)
   %slice.4 = add i32 %slice, 4
@@ -1255,8 +1315,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, <vscal
 ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x2_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z4.d, z1.d
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0]
 ; CHECK-NEXT:    usmlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15]
 ; CHECK-NEXT:    ret
@@ -1272,8 +1332,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x4_s8(i32 %slice, <vscal
 ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x4_s8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z27.d, z4.d
-; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    mov z26.d, z3.d
 ; CHECK-NEXT:    mov z25.d, z2.d
 ; CHECK-NEXT:    mov z24.d, z1.d
 ; CHECK-NEXT:    usmlall za.s[w8, 0:3, vgx4], { z24.b - z27.b }, z5.b[0]

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
index 20251aff99de14..e817dac3d1a6b1 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll
@@ -120,7 +120,9 @@ define void @multi_vector_sub_single_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -133,7 +135,9 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -146,7 +150,9 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -159,7 +165,9 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -176,7 +184,9 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -189,7 +199,9 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -202,7 +214,9 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -215,7 +229,9 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h
 ; CHECK-NEXT:    ret
@@ -232,7 +248,11 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -249,8 +269,11 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z3.d, z2.d
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -267,7 +290,11 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -284,7 +311,11 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_single_vg2x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -305,7 +336,11 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -322,7 +357,11 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat
 define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -339,7 +378,11 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, <vscale x 8 x half> %
 define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -356,7 +399,11 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h
 ; CHECK-NEXT:    ret
@@ -377,7 +424,11 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %z
 define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0,  <vscale x 8 x bfloat> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -392,7 +443,11 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -407,7 +462,11 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -422,7 +481,11 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -441,7 +504,11 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm0, <vscale x 8 x bfloat> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -456,7 +523,11 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm0, <vscale x 8 x half> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -471,7 +542,11 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -486,7 +561,11 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm0, <vscale x 8 x i16> %zm1) {
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h }
 ; CHECK-NEXT:    ret
@@ -505,7 +584,15 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -523,7 +610,15 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -541,7 +636,15 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -559,7 +662,15 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -581,7 +692,15 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -599,7 +718,15 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -617,7 +744,15 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, <vscale x 8 x half> %z
 define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -635,7 +770,15 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn
 define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3,
 ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h }
 ; CHECK-NEXT:    ret
@@ -769,7 +912,9 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, <vscale x 8 x i16> %zn,
 define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -784,7 +929,9 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -799,7 +946,9 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -814,7 +963,9 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -833,7 +984,9 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -848,7 +1001,9 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -863,7 +1018,9 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -878,7 +1035,9 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0]
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7]
 ; CHECK-NEXT:    ret
@@ -897,7 +1056,11 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -914,7 +1077,11 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -931,7 +1098,11 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -948,7 +1119,11 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -969,7 +1144,11 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1, <vscale x 8 x half> %zn2, <vscale x 8 x half> %zn3, <vscale x 8 x half> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -986,7 +1165,11 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, <vscale x 8 x half> %zn
 define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zn3, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1003,7 +1186,11 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, <vscale x 8 x bfloat>
 define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret
@@ -1020,7 +1207,11 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, <vscale x 8 x i16> %zn0
 define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, <vscale x 8 x i16> %zn0, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0]
 ; CHECK-NEXT:    umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
index a85da1a0d799cb..13205d30258188 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll
@@ -324,20 +324,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x2_s64
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
               @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -348,20 +348,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
               @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -372,20 +372,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
               @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -396,20 +396,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
               @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
@@ -484,20 +484,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_uhl_x2_u64
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
               @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
@@ -508,20 +508,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
               @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
@@ -532,20 +532,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
               @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
@@ -556,20 +556,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
 ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }
               @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
index f0a140d3d52c38..5e94f11390a821 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll
@@ -8,6 +8,7 @@ define <vscale x 2 x i64> @test_tileslice_no_add(i32 %idx) #0 {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
 ; CHECK-NEXT:    ret
 entry:
   %read = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx)
@@ -20,6 +21,7 @@ define <vscale x 2 x i64> @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2)
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    add w8, w0, w1
 ; CHECK-NEXT:    mov { z0.d, z1.d }, za.d[w8, 0, vgx2]
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1
 ; CHECK-NEXT:    ret
 entry:
   %add = add i32 %idx1, %idx2

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
index dfc23eaf8a4cea..ce9d0ca35cb629 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll
@@ -196,20 +196,20 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_mul
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_sat_double_mulh_multi_x4_s8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.b
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1b { z27.b }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1b { z31.b }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                                        <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) {
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }
@@ -221,20 +221,20 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_sat_double_mulh_multi_x4_s16(<vscale x 8 x i16> %unused, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1h { z27.h }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1h { z31.h }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                                         <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) {
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }
@@ -246,20 +246,20 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_sat_double_mulh_multi_x4_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1w { z27.s }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1w { z31.s }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                                         <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) {
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }
@@ -271,20 +271,20 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_sat_double_mulh_multi_x4_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4,
 ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z30.d, z7.d
+; CHECK-NEXT:    mov z27.d, z4.d
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z26.d, z7.d
-; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    ld1d { z27.d }, p0/z, [x0]
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d }
-; CHECK-NEXT:    mov z0.d, z4.d
-; CHECK-NEXT:    mov z1.d, z5.d
-; CHECK-NEXT:    mov z2.d, z6.d
-; CHECK-NEXT:    mov z3.d, z7.d
+; CHECK-NEXT:    mov z29.d, z6.d
+; CHECK-NEXT:    mov z26.d, z3.d
+; CHECK-NEXT:    mov z28.d, z5.d
+; CHECK-NEXT:    mov z25.d, z2.d
+; CHECK-NEXT:    ld1d { z31.d }, p0/z, [x0]
+; CHECK-NEXT:    mov z24.d, z1.d
+; CHECK-NEXT:    sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d }
+; CHECK-NEXT:    mov z0.d, z24.d
+; CHECK-NEXT:    mov z1.d, z25.d
+; CHECK-NEXT:    mov z2.d, z26.d
+; CHECK-NEXT:    mov z3.d, z27.d
 ; CHECK-NEXT:    ret
                                         <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) {
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> }

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
index 7387f889d8b26e..c2865990faab6f 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll
@@ -8,7 +8,9 @@
 define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,  <vscale x 4 x i32> %zm) {
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s
 ; CHECK-NEXT:    ret
@@ -25,7 +27,9 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, <vscale x 4
 define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,  <vscale x 2 x i64> %zm) {
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d
 ; CHECK-NEXT:    ret
@@ -46,7 +50,11 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, <vscale x 2
 define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s
 ; CHECK-NEXT:    ret
@@ -67,7 +75,11 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, <vscale x 4
 define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d
 ; CHECK-NEXT:    ret
@@ -93,7 +105,11 @@ define void @multi_vector_sub_write_single_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s }
 ; CHECK-NEXT:    ret
@@ -112,7 +128,11 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d }
 ; CHECK-NEXT:    ret
@@ -135,7 +155,15 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s }
 ; CHECK-NEXT:    ret
@@ -159,7 +187,15 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, <vscale x 4 x i32>
 define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1,
 ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d }
 ; CHECK-NEXT:    ret
@@ -189,7 +225,9 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, <vscale x 2 x i64>
 define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -202,7 +240,9 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, <vscale x 4 x i32> %zn0,
 define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0, <vscale x 2 x i64> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -215,7 +255,9 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, <vscale x 2 x i64> %zn0,
 define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.s[w8, 0, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    fsub za.s[w8, 7, vgx2], { z0.s, z1.s }
 ; CHECK-NEXT:    ret
@@ -230,7 +272,9 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, <vscale x 4 x float> %zn0
 define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn0, <vscale x 2 x double> %zn1) {
 ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fsub za.d[w8, 0, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    fsub za.d[w8, 7, vgx2], { z0.d, z1.d }
 ; CHECK-NEXT:    ret
@@ -247,7 +291,11 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, <vscale x 2 x double> %zn
 define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    sub za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -266,7 +314,11 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sub za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    sub za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret
@@ -285,7 +337,11 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.s[w8, 0, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    fsub za.s[w8, 7, vgx4], { z0.s - z3.s }
 ; CHECK-NEXT:    ret
@@ -304,7 +360,11 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice,
 define void @multi_vector_sub_za_vg1x4_f64(i32 %slice,
 ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fsub za.d[w8, 0, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    fsub za.d[w8, 7, vgx4], { z0.d - z3.d }
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
index c83206f4d57891..49106e12378bea 100644
--- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll
@@ -19,7 +19,7 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, <vscale x 8 x half>
 
 ; == BFVDOT ==
 
-define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm) #1 {
+define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloat> %zn1, <vscale x 8 x bfloat> %zn2, <vscale x 8 x bfloat> %zm) {
 ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -35,7 +35,7 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, <vscale x 8 x bfloa
 
 ; == SVDOT ==
 
-define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) #1 {
+define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -48,7 +48,7 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %
   ret void
 }
 
-define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
+define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: test_svdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -61,7 +61,7 @@ define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %
   ret void
 }
 
-define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) #1 {
+define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: test_svdot_lane_za64_vg1x4_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -151,7 +151,7 @@ entry:
 
 ; == UVDOT ==
 
-define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) #1 {
+define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -164,7 +164,7 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, <vscale x 8 x i16> %
   ret void
 }
 
-define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
+define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: test_uvdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -177,7 +177,7 @@ define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %
   ret void
 }
 
-define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) #1 {
+define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, <vscale x 8 x i16> %zn1, <vscale x 8 x i16> %zn2, <vscale x 8 x i16> %zn3, <vscale x 8 x i16> %zn4, <vscale x 8 x i16> %zm) {
 ; CHECK-LABEL: test_uvdot_lane_za64_vg1x4_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -267,7 +267,7 @@ entry:
 
 ; == SUVDOT ==
 
-define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
+define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: test_suvdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0
@@ -332,7 +332,7 @@ entry:
 
 ; == USVDOT ==
 
-define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) #1 {
+define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2, <vscale x 16 x i8> %zn3, <vscale x 16 x i8> %zn4, <vscale x 16 x i8> %zm) {
 ; CHECK-LABEL: test_usvdot_lane_za32_vg1x4_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, w0

diff  --git a/llvm/test/CodeGen/AArch64/smul_fix_sat.ll b/llvm/test/CodeGen/AArch64/smul_fix_sat.ll
index d72798936b7c80..c2d8d34b9305a3 100644
--- a/llvm/test/CodeGen/AArch64/smul_fix_sat.ll
+++ b/llvm/test/CodeGen/AArch64/smul_fix_sat.ll
@@ -146,6 +146,8 @@ define i64 @func8(i64 %x, i64 %y) nounwind {
 define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-LABEL: vec:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w9, v1.s[1]
 ; CHECK-NEXT:    mov w10, v0.s[1]
 ; CHECK-NEXT:    mov w8, #-2147483648 // =0x80000000
@@ -165,6 +167,7 @@ define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-NEXT:    csel w8, w8, w9, ne
 ; CHECK-NEXT:    fmov s0, w8
 ; CHECK-NEXT:    mov v0.s[1], w11
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp = call <2 x i32> @llvm.smul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
   ret <2 x i32> %tmp

diff  --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index 2bf1393c888b5b..b1e7ebe3a7e82b 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -113,8 +113,8 @@ body:             |
     liveins: $q0
     %0 = COPY $q0
     INLINEASM &nop, 1, 12, implicit-def dead $d0, 12, implicit-def dead $d1, 12, implicit-def dead $d2, 12, implicit-def dead $d3, 12, implicit-def dead $d4, 12, implicit-def dead $d5, 12, implicit-def dead $d6, 12, implicit-def dead $d7, 12, implicit-def dead $d8, 12, implicit-def dead $d9, 12, implicit-def dead $d10, 12, implicit-def dead $d11, 12, implicit-def dead $d12, 12, implicit-def dead $d13, 12, implicit-def dead $d14, 12, implicit-def dead $d15, 12, implicit-def dead $d16, 12, implicit-def dead $d17, 12, implicit-def dead $d18, 12, implicit-def dead $d19, 12, implicit-def dead $d20, 12, implicit-def dead $d21, 12, implicit-def dead $d22, 12, implicit-def dead $d23, 12, implicit-def dead $d24, 12, implicit-def dead $d25, 12, implicit-def dead $d26, 12, implicit-def dead $d27, 12, implicit-def dead $d28, 12, implicit-def dead $d29, 12, implicit-def dead $d30, 12, implicit-def $d31
-    ; CHECK:      %3:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
-    ; CHECK-NEXT: $s0 = COPY %3.ssub
+    ; CHECK:      %1:fpr128 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0)
+    ; CHECK-NEXT: $s0 = COPY %1.ssub
     $s0 = COPY %0.ssub
     RET_ReallyLR implicit $s0
 ...

diff  --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll
index 3fffb25704fe3f..b8ab9a00c69810 100644
--- a/llvm/test/CodeGen/AArch64/split-vector-insert.ll
+++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll
@@ -21,6 +21,7 @@ define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %
 ; CHECK-LEGALIZATION-NEXT:    ptrue p0.d, vl2
 ; CHECK-LEGALIZATION-NEXT:    mov w9, #2 // =0x2
 ; CHECK-LEGALIZATION-NEXT:    sub x8, x8, #2
+; CHECK-LEGALIZATION-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-LEGALIZATION-NEXT:    mov x10, sp
 ; CHECK-LEGALIZATION-NEXT:    cmp x8, #2
 ; CHECK-LEGALIZATION-NEXT:    mov z0.d, p0/m, z1.d
@@ -64,6 +65,7 @@ define <vscale x 2 x i64> @test_nxv2i64_v8i64(<vscale x 2 x i64> %a, <8 x i64> %
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov w9, #2 // =0x2
 ; CHECK-NEXT:    sub x8, x8, #2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    cmp x8, #2
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
@@ -116,6 +118,7 @@ define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x
 ; CHECK-LEGALIZATION-NEXT:    ptrue p0.d, vl2
 ; CHECK-LEGALIZATION-NEXT:    mov w9, #2 // =0x2
 ; CHECK-LEGALIZATION-NEXT:    sub x8, x8, #2
+; CHECK-LEGALIZATION-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-LEGALIZATION-NEXT:    mov x10, sp
 ; CHECK-LEGALIZATION-NEXT:    cmp x8, #2
 ; CHECK-LEGALIZATION-NEXT:    mov z0.d, p0/m, z1.d
@@ -159,6 +162,7 @@ define <vscale x 2 x double> @test_nxv2f64_v8f64(<vscale x 2 x double> %a, <8 x
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov w9, #2 // =0x2
 ; CHECK-NEXT:    sub x8, x8, #2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov x10, sp
 ; CHECK-NEXT:    cmp x8, #2
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d

diff  --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
index b009b6b990ebeb..b165ac0d56d20f 100644
--- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
@@ -302,6 +302,7 @@ define <2 x i64> @fold_srem_v2i64(<2 x i64> %x) {
 define <1 x i64> @fold_srem_v1i64(<1 x i64> %x) {
 ; CHECK-LABEL: fold_srem_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mov x8, #7378697629483820646 // =0x6666666666666666
 ; CHECK-NEXT:    movk x8, #26215

diff  --git a/llvm/test/CodeGen/AArch64/store.ll b/llvm/test/CodeGen/AArch64/store.ll
index 707b95a0514241..86d74b69f4958f 100644
--- a/llvm/test/CodeGen/AArch64/store.ll
+++ b/llvm/test/CodeGen/AArch64/store.ll
@@ -109,6 +109,7 @@ define void @store_v2i64(<2 x i64> %a, ptr %ptr){
 define void @store_v2i8(<2 x i8> %a, ptr %ptr){
 ; CHECK-SD-LABEL: store_v2i8:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    strb w9, [x0]
@@ -117,6 +118,7 @@ define void @store_v2i8(<2 x i8> %a, ptr %ptr){
 ;
 ; CHECK-GI-LABEL: store_v2i8:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    str b0, [x0]
 ; CHECK-GI-NEXT:    str b1, [x0, #1]
@@ -147,6 +149,7 @@ define void @store_v32i8(<32 x i8> %a, ptr %ptr){
 define void @store_v2i16(<2 x i16> %a, ptr %ptr){
 ; CHECK-SD-LABEL: store_v2i16:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    mov w8, v0.s[1]
 ; CHECK-SD-NEXT:    fmov w9, s0
 ; CHECK-SD-NEXT:    strh w9, [x0]
@@ -155,6 +158,7 @@ define void @store_v2i16(<2 x i16> %a, ptr %ptr){
 ;
 ; CHECK-GI-LABEL: store_v2i16:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    str h0, [x0]
 ; CHECK-GI-NEXT:    str h1, [x0, #2]
@@ -173,10 +177,16 @@ define void @store_v16i16(<16 x i16> %a, ptr %ptr){
 }
 
 define void @store_v1i32(<1 x i32> %a, ptr %ptr){
-; CHECK-LABEL: store_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str s0, [x0]
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: store_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    str s0, [x0]
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: store_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    str s0, [x0]
+; CHECK-GI-NEXT:    ret
     store <1 x i32> %a, ptr %ptr
     ret void
 }
@@ -232,6 +242,7 @@ define void @store_v7i8(<7 x i8> %a, ptr %ptr){
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add x8, x0, #6
 ; CHECK-SD-NEXT:    add x9, x0, #4
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str s0, [x0]
 ; CHECK-SD-NEXT:    st1 { v0.b }[6], [x8]
 ; CHECK-SD-NEXT:    st1 { v0.h }[2], [x9]
@@ -240,6 +251,7 @@ define void @store_v7i8(<7 x i8> %a, ptr %ptr){
 ; CHECK-GI-LABEL: store_v7i8:
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, #1
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    add x9, x0, #2
 ; CHECK-GI-NEXT:    st1 { v0.b }[0], [x0]
 ; CHECK-GI-NEXT:    st1 { v0.b }[1], [x8]
@@ -261,6 +273,7 @@ define void @store_v3i16(<3 x i16> %a, ptr %ptr){
 ; CHECK-SD-LABEL: store_v3i16:
 ; CHECK-SD:       // %bb.0:
 ; CHECK-SD-NEXT:    add x8, x0, #4
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    str s0, [x0]
 ; CHECK-SD-NEXT:    st1 { v0.h }[2], [x8]
 ; CHECK-SD-NEXT:    ret
@@ -269,6 +282,7 @@ define void @store_v3i16(<3 x i16> %a, ptr %ptr){
 ; CHECK-GI:       // %bb.0:
 ; CHECK-GI-NEXT:    add x8, x0, #2
 ; CHECK-GI-NEXT:    add x9, x0, #4
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    str h0, [x0]
 ; CHECK-GI-NEXT:    st1 { v0.h }[1], [x8]
 ; CHECK-GI-NEXT:    st1 { v0.h }[2], [x9]

diff  --git a/llvm/test/CodeGen/AArch64/sub.ll b/llvm/test/CodeGen/AArch64/sub.ll
index 44be83b1578234..8f35a69f52b85b 100644
--- a/llvm/test/CodeGen/AArch64/sub.ll
+++ b/llvm/test/CodeGen/AArch64/sub.ll
@@ -399,14 +399,19 @@ define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) {
 ;
 ; CHECK-GI-LABEL: v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
-; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
-; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT:    // kill: def $d3 killed $d3 def $q3
+; CHECK-GI-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT:    // kill: def $d4 killed $d4 def $q4
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    fmov x9, d5
-; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v3.2d
+; CHECK-GI-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT:    mov v3.d[1], v4.d[0]
 ; CHECK-GI-NEXT:    sub x8, x8, x9
 ; CHECK-GI-NEXT:    fmov d2, x8
+; CHECK-GI-NEXT:    sub v0.2d, v0.2d, v3.2d
 ; CHECK-GI-NEXT:    mov d1, v0.d[1]
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %s = sub <3 x i64> %d, %e

diff  --git a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
index dd9e462b859054..d04dc500287653 100644
--- a/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cntp-combine-i32.ll
@@ -8,7 +8,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg)
@@ -20,7 +22,9 @@ define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg)
@@ -32,7 +36,9 @@ define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg)
@@ -44,7 +50,9 @@ define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
@@ -56,7 +64,9 @@ define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
@@ -87,7 +97,9 @@ define i64 @cntp_add_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1
 define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -98,7 +110,9 @@ define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -109,7 +123,9 @@ define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -120,7 +136,9 @@ define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_add_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_add_same_active_nxv2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incp x0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -149,7 +167,9 @@ define i64 @cntp_add_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i
 define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg)
@@ -161,7 +181,9 @@ define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg)
@@ -173,7 +195,9 @@ define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg)
@@ -185,7 +209,9 @@ define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg)
@@ -197,7 +223,9 @@ define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1)
@@ -228,7 +256,9 @@ define i64 @cntp_sub_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1
 define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -239,7 +269,9 @@ define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 {
 define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -250,7 +282,9 @@ define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 {
 define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg)
   %2 = trunc i64 %1 to i32
@@ -261,7 +295,9 @@ define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 {
 define i32 @cntp_sub_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 {
 ; CHECK-LABEL: cntp_sub_same_active_nxv2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decp x0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg)
   %2 = trunc i64 %1 to i32

diff  --git a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll
index 9100624fb62411..7bc31d44bb6547 100644
--- a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll
+++ b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll
@@ -8,6 +8,7 @@ define float @add_f32(<vscale x 8 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fadd z0.s, z0.s, z2.s
 ; CHECK-NEXT:    faddv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fadd.f32.nxv8f32(float -0.0, <vscale x 8 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fadd.f32.nxv4f32(float -0.0, <vscale x 4 x float> %b)
@@ -29,6 +30,7 @@ define float @fmin_f32(<vscale x 8 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %b)
@@ -43,6 +45,7 @@ define float @fmax_f32(<vscale x 8 x float> %a, <vscale x 4 x float> %b) {
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %b)
@@ -89,6 +92,7 @@ define i32 @add_i32(<vscale x 8 x i32> %a, <vscale x 4 x i32> %b) {
 ; CHECK-NEXT:    add z0.s, z0.s, z2.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %r1 = call i32 @llvm.vector.reduce.add.i32.nxv8i32(<vscale x 8 x i32> %a)
   %r2 = call i32 @llvm.vector.reduce.add.i32.nxv4i32(<vscale x 4 x i32> %b)
@@ -109,6 +113,7 @@ define i16 @add_ext_i16(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ae = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
   %be = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
@@ -135,6 +140,7 @@ define i16 @add_ext_v32i16(<vscale x 32 x i8> %a, <vscale x 16 x i8> %b) {
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ae = zext <vscale x 32 x i8> %a to <vscale x 32 x i16>
   %be = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index 399490e270f901..6d4f5963881e58 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -118,6 +118,7 @@ define i64 @test_lane2_2xi64(<vscale x 2 x i64> %a) #0 {
 define half @test_lane0_8xf16(<vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: test_lane0_8xf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x half> %a, i32 0
   ret half %b
@@ -127,6 +128,7 @@ define half @test_lane7_8xf16(<vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: test_lane7_8xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x half> %a, i32 7
   ret half %b
@@ -136,6 +138,7 @@ define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
 ; CHECK-LABEL: test_lane8_8xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[8]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x half> %a, i32 8
   ret half %b
@@ -144,6 +147,7 @@ define half @test_lane8_8xf16(<vscale x 8 x half> %a) #0 {
 define half @test_lane0_4xf16(<vscale x 4 x half> %a) #0 {
 ; CHECK-LABEL: test_lane0_4xf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x half> %a, i32 0
   ret half %b
@@ -153,6 +157,7 @@ define half @test_lane3_4xf16(<vscale x 4 x half> %a) #0 {
 ; CHECK-LABEL: test_lane3_4xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x half> %a, i32 3
   ret half %b
@@ -162,6 +167,7 @@ define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
 ; CHECK-LABEL: test_lane4_4xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[4]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x half> %a, i32 4
   ret half %b
@@ -170,6 +176,7 @@ define half @test_lane4_4xf16(<vscale x 4 x half> %a) #0 {
 define half @test_lane0_2xf16(<vscale x 2 x half> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x half> %a, i32 0
   ret half %b
@@ -179,6 +186,7 @@ define half @test_lane1_2xf16(<vscale x 2 x half> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x half> %a, i32 1
   ret half %b
@@ -188,6 +196,7 @@ define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x half> %a, i32 2
   ret half %b
@@ -196,6 +205,7 @@ define half @test_lane2_2xf16(<vscale x 2 x half> %a) #0 {
 define bfloat @test_lane0_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane0_8xbf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x bfloat> %a, i32 0
   ret bfloat %b
@@ -205,6 +215,7 @@ define bfloat @test_lane7_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane7_8xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x bfloat> %a, i32 7
   ret bfloat %b
@@ -214,6 +225,7 @@ define bfloat @test_lane8_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane8_8xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, z0.h[8]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 8 x bfloat> %a, i32 8
   ret bfloat %b
@@ -222,6 +234,7 @@ define bfloat @test_lane8_8xbf16(<vscale x 8 x bfloat> %a) #0 {
 define bfloat @test_lane0_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane0_4xbf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x bfloat> %a, i32 0
   ret bfloat %b
@@ -231,6 +244,7 @@ define bfloat @test_lane3_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane3_4xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x bfloat> %a, i32 3
   ret bfloat %b
@@ -240,6 +254,7 @@ define bfloat @test_lane4_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane4_4xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[4]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x bfloat> %a, i32 4
   ret bfloat %b
@@ -248,6 +263,7 @@ define bfloat @test_lane4_4xbf16(<vscale x 4 x bfloat> %a) #0 {
 define bfloat @test_lane0_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xbf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x bfloat> %a, i32 0
   ret bfloat %b
@@ -257,6 +273,7 @@ define bfloat @test_lane1_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x bfloat> %a, i32 1
   ret bfloat %b
@@ -266,6 +283,7 @@ define bfloat @test_lane2_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x bfloat> %a, i32 2
   ret bfloat %b
@@ -274,6 +292,7 @@ define bfloat @test_lane2_2xbf16(<vscale x 2 x bfloat> %a) #0 {
 define float @test_lane0_4xf32(<vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: test_lane0_4xf32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x float> %a, i32 0
   ret float %b
@@ -283,6 +302,7 @@ define float @test_lane3_4xf32(<vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: test_lane3_4xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x float> %a, i32 3
   ret float %b
@@ -292,6 +312,7 @@ define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
 ; CHECK-LABEL: test_lane4_4xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, z0.s[4]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 4 x float> %a, i32 4
   ret float %b
@@ -300,6 +321,7 @@ define float @test_lane4_4xf32(<vscale x 4 x float> %a) #0 {
 define float @test_lane0_2xf32(<vscale x 2 x float> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xf32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x float> %a, i32 0
   ret float %b
@@ -309,6 +331,7 @@ define float @test_lane1_2xf32(<vscale x 2 x float> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x float> %a, i32 1
   ret float %b
@@ -318,6 +341,7 @@ define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xf32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x float> %a, i32 2
   ret float %b
@@ -326,6 +350,7 @@ define float @test_lane2_2xf32(<vscale x 2 x float> %a) #0 {
 define double @test_lane0_2xf64(<vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: test_lane0_2xf64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x double> %a, i32 0
   ret double %b
@@ -335,6 +360,7 @@ define double @test_lane1_2xf64(<vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: test_lane1_2xf64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x double> %a, i32 1
   ret double %b
@@ -344,6 +370,7 @@ define double @test_lane2_2xf64(<vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: test_lane2_2xf64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, z0.d[2]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %b = extractelement <vscale x 2 x double> %a, i32 2
   ret double %b

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
index 6bce1050bbc811..5b7522856e2daf 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll
@@ -85,6 +85,7 @@ define <2 x i16> @extract_v2i16_nxv32i16_8(<vscale x 32 x i16> %arg) {
 ; CHECK-NEXT:    addvl x8, sp, #4
 ; CHECK-NEXT:    add x8, x8, #34
 ; CHECK-NEXT:    ld1 { v0.h }[2], [x8]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    addvl sp, sp, #8
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -143,6 +144,7 @@ define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
 ; CHECK-LABEL: extract_v2f32_nxv16f32_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x float> @llvm.vector.extract.v2f32.nxv16f32(<vscale x 16 x float> %arg, i64 2)
   ret <2 x float> %ext
@@ -159,6 +161,7 @@ define <4 x i1> @extract_v4i1_nxv32i1_0(<vscale x 32 x i1> %arg) {
 ; CHECK-NEXT:    umov w8, v1.b[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ext = call <4 x i1> @llvm.vector.extract.v4i1.nxv32i1(<vscale x 32 x i1> %arg, i64 0)
   ret <4 x i1> %ext
@@ -196,6 +199,7 @@ define <4 x i1> @extract_v4i1_nxv32i1_16(<vscale x 32 x i1> %arg) {
 ; CHECK-NEXT:    addvl x8, sp, #6
 ; CHECK-NEXT:    add x8, x8, #19
 ; CHECK-NEXT:    ld1 { v0.b }[6], [x8]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    addvl sp, sp, #8
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -214,6 +218,7 @@ define <4 x i1> @extract_v4i1_v32i1_16(<32 x i1> %arg) {
 ; CHECK-NEXT:    mov v0.h[2], w8
 ; CHECK-NEXT:    ldr w8, [sp, #88]
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ext = call <4 x i1> @llvm.vector.extract.v4i1.v32i1(<32 x i1> %arg, i64 16)
   ret <4 x i1> %ext
@@ -249,6 +254,7 @@ define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
 ; CHECK-NEXT:    addvl x8, sp, #6
 ; CHECK-NEXT:    add x8, x8, #19
 ; CHECK-NEXT:    ld1 { v0.b }[6], [x8]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    addvl sp, sp, #8
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -262,6 +268,7 @@ define <2 x i32> @extract_v2i32_nxv16i32_2(<vscale x 16 x i32> %arg) {
 ; CHECK-LABEL: extract_v2i32_nxv16i32_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %arg, i64 2)
   ret <2 x i32> %ext
@@ -277,6 +284,7 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ldr q1, [sp, #16]
 ; CHECK-NEXT:    addvl sp, sp, #2
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -290,6 +298,7 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 0)
   ret <4 x half> %ext
@@ -301,6 +310,7 @@ define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 4)
   ret <4 x half> %ext
@@ -312,6 +322,7 @@ define <2 x half> @extract_v2f16_nxv4f16_2(<vscale x 4 x half> %arg) {
 ; CHECK-NEXT:    mov z1.s, z0.s[3]
 ; CHECK-NEXT:    mov z0.s, z0.s[2]
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 2)
   ret <2 x half> %ext
@@ -323,6 +334,7 @@ define <2 x half> @extract_v2f16_nxv4f16_6(<vscale x 4 x half> %arg) {
 ; CHECK-NEXT:    mov z1.s, z0.s[7]
 ; CHECK-NEXT:    mov z0.s, z0.s[6]
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 6)
   ret <2 x half> %ext

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
index 06ca0b4a4b7601..518e3573b5edd3 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll
@@ -5,6 +5,7 @@
 define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
 ; CHECK-LABEL: extract_v2i64_nxv2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 0)
   ret <2 x i64> %retval
@@ -15,6 +16,7 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
 ; CHECK-LABEL: extract_v2i64_nxv2i64_idx2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
   ret <2 x i64> %retval
@@ -24,6 +26,7 @@ define <2 x i64> @extract_v2i64_nxv2i64_idx2(<vscale x 2 x i64> %vec) nounwind {
 define <4 x i32> @extract_v4i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
 ; CHECK-LABEL: extract_v4i32_nxv4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
   ret <4 x i32> %retval
@@ -34,6 +37,7 @@ define <4 x i32> @extract_v4i32_nxv4i32_idx4(<vscale x 4 x i32> %vec) nounwind {
 ; CHECK-LABEL: extract_v4i32_nxv4i32_idx4:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 4)
   ret <4 x i32> %retval
@@ -44,6 +48,7 @@ define <4 x i32> @extract_v4i32_nxv2i32(<vscale x 2 x i32> %vec) nounwind #1 {
 ; CHECK-LABEL: extract_v4i32_nxv2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %vec, i64 0)
   ret <4 x i32> %retval
@@ -55,6 +60,7 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #32
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <4 x i32> @llvm.vector.extract.v4i32.nxv2i32(<vscale x 2 x i32> %vec, i64 4)
   ret <4 x i32> %retval
@@ -64,6 +70,7 @@ define <4 x i32> @extract_v4i32_nxv2i32_idx4(<vscale x 2 x i32> %vec) nounwind #
 define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
 ; CHECK-LABEL: extract_v8i16_nxv8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 0)
   ret <8 x i16> %retval
@@ -74,6 +81,7 @@ define <8 x i16> @extract_v8i16_nxv8i16_idx8(<vscale x 8 x i16> %vec) nounwind {
 ; CHECK-LABEL: extract_v8i16_nxv8i16_idx8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 8)
   ret <8 x i16> %retval
@@ -84,6 +92,7 @@ define <8 x i16> @extract_v8i16_nxv4i16(<vscale x 4 x i16> %vec) nounwind #1 {
 ; CHECK-LABEL: extract_v8i16_nxv4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv4i16(<vscale x 4 x i16> %vec, i64 0)
   ret <8 x i16> %retval
@@ -95,6 +104,7 @@ define <8 x i16> @extract_v8i16_nxv4i16_idx8(<vscale x 4 x i16> %vec) nounwind #
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #32
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv4i16(<vscale x 4 x i16> %vec, i64 8)
   ret <8 x i16> %retval
@@ -106,6 +116,7 @@ define <8 x i16> @extract_v8i16_nxv2i16(<vscale x 2 x i16> %vec) nounwind #1 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv2i16(<vscale x 2 x i16> %vec, i64 0)
   ret <8 x i16> %retval
@@ -118,6 +129,7 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #64
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <8 x i16> @llvm.vector.extract.v8i16.nxv2i16(<vscale x 2 x i16> %vec, i64 8)
   ret <8 x i16> %retval
@@ -127,6 +139,7 @@ define <8 x i16> @extract_v8i16_nxv2i16_idx8(<vscale x 2 x i16> %vec) nounwind #
 define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
 ; CHECK-LABEL: extract_v16i8_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -137,6 +150,7 @@ define <16 x i8> @extract_v16i8_nxv16i8_idx16(<vscale x 16 x i8> %vec) nounwind
 ; CHECK-LABEL: extract_v16i8_nxv16i8_idx16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -147,6 +161,7 @@ define <16 x i8> @extract_v16i8_nxv8i8(<vscale x 8 x i8> %vec) nounwind #1 {
 ; CHECK-LABEL: extract_v16i8_nxv8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -158,6 +173,7 @@ define <16 x i8> @extract_v16i8_nxv8i8_idx16(<vscale x 8 x i8> %vec) nounwind #1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #32
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv8i8(<vscale x 8 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -169,6 +185,7 @@ define <16 x i8> @extract_v16i8_nxv4i8(<vscale x 4 x i8> %vec) nounwind #1 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv4i8(<vscale x 4 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -181,6 +198,7 @@ define <16 x i8> @extract_v16i8_nxv4i8_idx16(<vscale x 4 x i8> %vec) nounwind #1
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #64
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv4i8(<vscale x 4 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -193,6 +211,7 @@ define <16 x i8> @extract_v16i8_nxv2i8(<vscale x 2 x i8> %vec) nounwind #1 {
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8> %vec, i64 0)
   ret <16 x i8> %retval
@@ -206,6 +225,7 @@ define <16 x i8> @extract_v16i8_nxv2i8_idx16(<vscale x 2 x i8> %vec) nounwind #1
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <16 x i8> @llvm.vector.extract.v16i8.nxv2i8(<vscale x 2 x i8> %vec, i64 16)
   ret <16 x i8> %retval
@@ -222,6 +242,7 @@ define <2 x i1> @extract_v2i1_nxv2i1(<vscale x 2 x i1> %inmask) {
 ; CHECK-NEXT:    mov x8, v0.d[1]
 ; CHECK-NEXT:    fmov s0, w0
 ; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mask = call <2 x i1> @llvm.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %inmask, i64 0)
   ret <2 x i1> %mask
@@ -238,6 +259,7 @@ define <4 x i1> @extract_v4i1_nxv4i1(<vscale x 4 x i1> %inmask) {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mask = call <4 x i1> @llvm.vector.extract.v4i1.nxv4i1(<vscale x 4 x i1> %inmask, i64 0)
   ret <4 x i1> %mask
@@ -262,6 +284,7 @@ define <8 x i1> @extract_v8i1_nxv8i1(<vscale x 8 x i1> %inmask) {
 ; CHECK-NEXT:    umov w8, v1.h[7]
 ; CHECK-NEXT:    mov v0.b[6], w9
 ; CHECK-NEXT:    mov v0.b[7], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %mask = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> %inmask, i64 0)
   ret <8 x i1> %mask
@@ -301,6 +324,7 @@ define <2 x i64> @extract_fixed_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind
 ; CHECK-LABEL: extract_fixed_v2i64_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %retval = call <2 x i64> @llvm.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 2)
   ret <2 x i64> %retval
@@ -324,6 +348,7 @@ define <4 x i32> @typesize_regression_test_v4i32(ptr %addr, i64 %idx) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, x1, lsl #2]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 entry:
   %ptr = getelementptr inbounds i32, ptr %addr, i64 %idx
@@ -340,6 +365,7 @@ entry:
 define <2 x float> @extract_v2f32_nxv4f32_splat(float %f) {
 ; CHECK-LABEL: extract_v2f32_nxv4f32_splat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 4 x float> poison, float %f, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
index 99e833fc17271a..cbede1bf8bb74b 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll
@@ -1025,6 +1025,7 @@ declare <vscale x 4 x bfloat> @llvm.vector.extract.nxv4bf16.nxv16bf16(<vscale x
 define <vscale x 2 x float> @extract_nxv2f32_nxv4f32_splat(float %f) {
 ; CHECK-LABEL: extract_nxv2f32_nxv4f32_splat:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 4 x float> poison, float %f, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll
index e2227f5855938c..e7681cae083720 100644
--- a/llvm/test/CodeGen/AArch64/sve-fadda-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fadda-select.ll
@@ -6,7 +6,9 @@
 define float @pred_fadda_nxv2f32(float %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x float> %y, <vscale x 2 x float> splat(float -0.000000e+00)
   %fadda = call float @llvm.vector.reduce.fadd.nxv2f32(float %x, <vscale x 2 x float> %sel)
@@ -16,7 +18,9 @@ define float @pred_fadda_nxv2f32(float %x, <vscale x 2 x float> %y, <vscale x 2
 define float @pred_fadda_nxv4f32(float %x, <vscale x 4 x float> %y, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x float> %y, <vscale x 4 x float> splat(float -0.000000e+00)
   %fadda = call float @llvm.vector.reduce.fadd.nxv4f32(float %x, <vscale x 4 x float> %sel)
@@ -26,7 +30,9 @@ define float @pred_fadda_nxv4f32(float %x, <vscale x 4 x float> %y, <vscale x 4
 define double @pred_fadda_nxv2f64(double %x, <vscale x 2 x double> %y, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x double> %y, <vscale x 2 x double> splat(double -0.000000e+00)
   %fadda = call double @llvm.vector.reduce.fadd.nxv2f64(double %x, <vscale x 2 x double> %sel)
@@ -36,7 +42,9 @@ define double @pred_fadda_nxv2f64(double %x, <vscale x 2 x double> %y, <vscale x
 define half @pred_fadda_nxv2f16(half %x, <vscale x 2 x half> %y, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 2 x i1> %mask, <vscale x 2 x half> %y, <vscale x 2 x half> splat(half -0.000000e+00)
   %fadda = call half @llvm.vector.reduce.fadd.nxv2f16(half %x, <vscale x 2 x half> %sel)
@@ -46,7 +54,9 @@ define half @pred_fadda_nxv2f16(half %x, <vscale x 2 x half> %y, <vscale x 2 x i
 define half @pred_fadda_nxv4f16(half %x, <vscale x 4 x half> %y, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 4 x i1> %mask, <vscale x 4 x half> %y, <vscale x 4 x half> splat(half -0.000000e+00)
   %fadda = call half @llvm.vector.reduce.fadd.nxv4f16(half %x, <vscale x 4 x half> %sel)
@@ -56,7 +66,9 @@ define half @pred_fadda_nxv4f16(half %x, <vscale x 4 x half> %y, <vscale x 4 x i
 define half @pred_fadda_nxv8f16(half %x, <vscale x 8 x half> %y, <vscale x 8 x i1> %mask) {
 ; CHECK-LABEL: pred_fadda_nxv8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %sel = select <vscale x 8 x i1> %mask, <vscale x 8 x half> %y, <vscale x 8 x half> splat(half -0.000000e+00)
   %fadda = call half @llvm.vector.reduce.fadd.nxv8f16(half %x, <vscale x 8 x half> %sel)

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
index cf42145ea542ec..1e71c4b66156cb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll
@@ -277,7 +277,9 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ctlz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
   ret <1 x i64> %res
@@ -287,7 +289,9 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ctlz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %op)
   ret <2 x i64> %res
@@ -737,6 +741,7 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz v0.8b, v0.8b
 ; CHECK-NEXT:    ret
@@ -748,6 +753,7 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz v0.16b, v0.16b
 ; CHECK-NEXT:    ret
@@ -833,6 +839,7 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz v0.4h, v0.4h
 ; CHECK-NEXT:    ret
@@ -844,6 +851,7 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz v0.8h, v0.8h
 ; CHECK-NEXT:    ret
@@ -930,6 +938,7 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz v0.2s, v0.2s
 ; CHECK-NEXT:    ret
@@ -942,6 +951,7 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz v0.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -1027,8 +1037,10 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)
   ret <1 x i64> %res
@@ -1038,8 +1050,10 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: cttz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %op)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
index 92b332f7611fb8..f7751131005e30 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll
@@ -23,6 +23,8 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) vscale_range(2,0) #0
 define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <8 x i8> %op1, <8 x i8> %op2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
@@ -182,6 +184,8 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) vscale_range(2,0)
 define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <4 x i16> %op1, <4 x i16> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -310,6 +314,8 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) vscale_range(2,0)
 define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <2 x i32> %op1, <2 x i32> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -413,6 +419,8 @@ define void @concat_v64i32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
 define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <1 x i64> %op1, <1 x i64> %op2, <2 x i32> <i32 0, i32 1>
@@ -519,6 +527,8 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) vscale_range(2
 define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <4 x half> %op1, <4 x half> %op2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -647,6 +657,8 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) vscale_rang
 define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <2 x float> %op1, <2 x float> %op2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -750,6 +762,8 @@ define void @concat_v64f32(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #0 {
 define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: concat_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    ret
   %res = shufflevector <1 x double> %op1, <1 x double> %op2, <2 x i32> <i32 0, i32 1>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
index dddfccaf8acce3..0f8f4a6843eae8 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
@@ -22,6 +22,7 @@ define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <8 x i8> @llvm.vector.extract.v8i8.v16i8(<16 x i8> %op, i64 8)
   ret <8 x i8> %ret
@@ -102,6 +103,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2)
   ret <2 x i16> %ret
@@ -112,6 +114,7 @@ define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <4 x i16> @llvm.vector.extract.v4i16.v8i16(<8 x i16> %op, i64 4)
   ret <4 x i16> %ret
@@ -190,6 +193,7 @@ define void @extract_subvector_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %ret = call <1 x i32> @llvm.vector.extract.v1i32.v2i32(<2 x i32> %op, i64 1)
@@ -201,6 +205,7 @@ define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <2 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %op, i64 2)
   ret <2 x i32> %ret
@@ -280,6 +285,7 @@ define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <1 x i64> @llvm.vector.extract.v1i64.v2i64(<2 x i64> %op, i64 1)
   ret <1 x i64> %ret
@@ -351,6 +357,7 @@ define void @extract_subvector_v32i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
 define <2 x half> @extract_subvector_v4f16(<4 x half> %op) vscale_range(16,0) #0 {
 ; CHECK-LABEL: extract_subvector_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %ret = call <2 x half> @llvm.vector.extract.v2f16.v4f16(<4 x half> %op, i64 2)
@@ -362,6 +369,7 @@ define <4 x half> @extract_subvector_v8f16(<8 x half> %op) vscale_range(2,0) #0
 ; CHECK-LABEL: extract_subvector_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <4 x half> @llvm.vector.extract.v4f16.v8f16(<8 x half> %op, i64 4)
   ret <4 x half> %ret
@@ -440,6 +448,7 @@ define void @extract_subvector_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x float> @extract_subvector_v2f32(<2 x float> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extract_subvector_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[1]
 ; CHECK-NEXT:    ret
   %ret = call <1 x float> @llvm.vector.extract.v1f32.v2f32(<2 x float> %op, i64 1)
@@ -451,6 +460,7 @@ define <2 x float> @extract_subvector_v4f32(<4 x float> %op) vscale_range(2,0) #
 ; CHECK-LABEL: extract_subvector_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> %op, i64 2)
   ret <2 x float> %ret
@@ -530,6 +540,7 @@ define <1 x double> @extract_subvector_v2f64(<2 x double> %op) vscale_range(2,0)
 ; CHECK-LABEL: extract_subvector_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %ret = call <1 x double> @llvm.vector.extract.v1f64.v2f64(<2 x double> %op, i64 1)
   ret <1 x double> %ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
index 079ff17d5f0e4c..ad4efeaf39247a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll
@@ -13,6 +13,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @extractelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extractelement_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h0, v0.h[3]
 ; CHECK-NEXT:    ret
     %r = extractelement <4 x half> %op1, i64 3
@@ -35,6 +36,7 @@ define half @extractelement_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    mov z0.h, z0.h[15]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
     %op1 = load <16 x half>, ptr %a
     %r = extractelement <16 x half> %op1, i64 15
@@ -48,6 +50,7 @@ define half @extractelement_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 ; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    mov z0.h, z0.h[15]
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: extractelement_v32f16:
@@ -55,6 +58,7 @@ define half @extractelement_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    mov z0.h, z0.h[31]
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
     %op1 = load <32 x half>, ptr %a
     %r = extractelement <32 x half> %op1, i64 31
@@ -93,6 +97,7 @@ define half @extractelement_v128f16(ptr %a) vscale_range(16,0) #0 {
 define float @extractelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: extractelement_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    ret
     %r = extractelement <2 x float> %op1, i64 1
@@ -115,6 +120,7 @@ define float @extractelement_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    mov z0.s, z0.s[7]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
     %op1 = load <8 x float>, ptr %a
     %r = extractelement <8 x float> %op1, i64 7
@@ -128,6 +134,7 @@ define float @extractelement_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
 ; VBITS_GE_256-NEXT:    mov z0.s, z0.s[7]
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: extractelement_v16f32:
@@ -135,6 +142,7 @@ define float @extractelement_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    mov z0.s, z0.s[15]
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
     %op1 = load <16 x float>, ptr %a
     %r = extractelement <16 x float> %op1, i64 15
@@ -194,6 +202,7 @@ define double @extractelement_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    mov z0.d, z0.d[3]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
     %op1 = load <4 x double>, ptr %a
     %r = extractelement <4 x double> %op1, i64 3
@@ -207,6 +216,7 @@ define double @extractelement_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
 ; VBITS_GE_256-NEXT:    mov z0.d, z0.d[3]
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: extractelement_v8f64:
@@ -214,6 +224,7 @@ define double @extractelement_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    mov z0.d, z0.d[7]
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
     %op1 = load <8 x double>, ptr %a
     %r = extractelement <8 x double> %op1, i64 7

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
index ccf47cf75d2c8c..b60988be1e76c7 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll
@@ -524,6 +524,7 @@ define void @fcvt_v32f64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvt_v1f64_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-NEXT:    str s0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
index 0b11789b7d549a..8053a401e5f45e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll
@@ -14,7 +14,10 @@ define half @fadda_v4f16(half %start, <4 x half> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
   ret half %res
@@ -25,7 +28,10 @@ define half @fadda_v8f16(half %start, <8 x half> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
   ret half %res
@@ -35,8 +41,10 @@ define half @fadda_v16f16(half %start, ptr %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fadda_v16f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl16
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
@@ -47,18 +55,22 @@ define half @fadda_v32f16(half %start, ptr %a) #0 {
 ; VBITS_GE_256-LABEL: fadda_v32f16:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fadda h0, p0, h0, z1.h
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    fadda h0, p0, h0, z1.h
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fadda_v32f16:
 ; VBITS_GE_512:       // %bb.0:
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; VBITS_GE_512-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fadda h0, p0, h0, z1.h
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v32f16(half %start, <32 x half> %op)
@@ -69,8 +81,10 @@ define half @fadda_v64f16(half %start, ptr %a) vscale_range(8,0) #0 {
 ; CHECK-LABEL: fadda_v64f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl64
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v64f16(half %start, <64 x half> %op)
@@ -81,8 +95,10 @@ define half @fadda_v128f16(half %start, ptr %a) vscale_range(16,0) #0 {
 ; CHECK-LABEL: fadda_v128f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl128
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fadd.v128f16(half %start, <128 x half> %op)
@@ -94,7 +110,10 @@ define float @fadda_v2f32(float %start, <2 x float> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
   ret float %res
@@ -105,7 +124,10 @@ define float @fadda_v4f32(float %start, <4 x float> %a) vscale_range(1,0) #0 {
 ; CHECK-LABEL: fadda_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
   ret float %res
@@ -115,8 +137,10 @@ define float @fadda_v8f32(float %start, ptr %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fadda_v8f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
@@ -127,18 +151,22 @@ define float @fadda_v16f32(float %start, ptr %a) #0 {
 ; VBITS_GE_256-LABEL: fadda_v16f32:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fadda s0, p0, s0, z1.s
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
 ; VBITS_GE_256-NEXT:    fadda s0, p0, s0, z1.s
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fadda_v16f32:
 ; VBITS_GE_512:       // %bb.0:
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; VBITS_GE_512-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fadda s0, p0, s0, z1.s
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v16f32(float %start, <16 x float> %op)
@@ -149,8 +177,10 @@ define float @fadda_v32f32(float %start, ptr %a) vscale_range(8,0) #0 {
 ; CHECK-LABEL: fadda_v32f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl32
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v32f32(float %start, <32 x float> %op)
@@ -161,8 +191,10 @@ define float @fadda_v64f32(float %start, ptr %a) vscale_range(16,0) #0 {
 ; CHECK-LABEL: fadda_v64f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl64
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fadd.v64f32(float %start, <64 x float> %op)
@@ -184,7 +216,10 @@ define double @fadda_v2f64(double %start, <2 x double> %a) vscale_range(1,0) #0
 ; CHECK-LABEL: fadda_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
   ret double %res
@@ -194,8 +229,10 @@ define double @fadda_v4f64(double %start, ptr %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fadda_v4f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
@@ -206,18 +243,22 @@ define double @fadda_v8f64(double %start, ptr %a) #0 {
 ; VBITS_GE_256-LABEL: fadda_v8f64:
 ; VBITS_GE_256:       // %bb.0:
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fadda d0, p0, d0, z1.d
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
 ; VBITS_GE_256-NEXT:    fadda d0, p0, d0, z1.d
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fadda_v8f64:
 ; VBITS_GE_512:       // %bb.0:
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fadda d0, p0, d0, z1.d
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v8f64(double %start, <8 x double> %op)
@@ -228,8 +269,10 @@ define double @fadda_v16f64(double %start, ptr %a) vscale_range(8,0) #0 {
 ; CHECK-LABEL: fadda_v16f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v16f64(double %start, <16 x double> %op)
@@ -240,8 +283,10 @@ define double @fadda_v32f64(double %start, ptr %a) vscale_range(16,0) #0 {
 ; CHECK-LABEL: fadda_v32f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fadd.v32f64(double %start, <32 x double> %op)
@@ -257,6 +302,7 @@ define half @faddv_v4f16(half %start, <4 x half> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: faddv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -269,6 +315,7 @@ define half @faddv_v8f16(half %start, <8 x half> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: faddv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -355,6 +402,7 @@ define float @faddv_v4f32(float %start, <4 x float> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: faddv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv s1, p0, z1.s
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
@@ -539,6 +587,7 @@ define half @fmaxv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
@@ -554,6 +603,7 @@ define half @fmaxv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fmaxnmv h0, p0, z0.h
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaxv_v32f16:
@@ -561,6 +611,7 @@ define half @fmaxv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxnmv h0, p0, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %op)
@@ -573,6 +624,7 @@ define half @fmaxv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v64f16(<64 x half> %op)
@@ -585,6 +637,7 @@ define half @fmaxv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmax.v128f16(<128 x half> %op)
@@ -617,6 +670,7 @@ define float @fmaxv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
@@ -632,6 +686,7 @@ define float @fmaxv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fmaxnmv s0, p0, z0.s
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaxv_v16f32:
@@ -639,6 +694,7 @@ define float @fmaxv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxnmv s0, p0, z0.s
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %op)
@@ -651,6 +707,7 @@ define float @fmaxv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %op)
@@ -663,6 +720,7 @@ define float @fmaxv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmax.v64f32(<64 x float> %op)
@@ -694,6 +752,7 @@ define double @fmaxv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
@@ -709,6 +768,7 @@ define double @fmaxv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fmaxnmv d0, p0, z0.d
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaxv_v8f64:
@@ -716,6 +776,7 @@ define double @fmaxv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxnmv d0, p0, z0.d
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %op)
@@ -728,6 +789,7 @@ define double @fmaxv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %op)
@@ -740,6 +802,7 @@ define double @fmaxv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %op)
@@ -776,6 +839,7 @@ define half @fminv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
@@ -791,6 +855,7 @@ define half @fminv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fminnmv h0, p0, z0.h
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminv_v32f16:
@@ -798,6 +863,7 @@ define half @fminv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminnmv h0, p0, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %op)
@@ -810,6 +876,7 @@ define half @fminv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v64f16(<64 x half> %op)
@@ -822,6 +889,7 @@ define half @fminv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmin.v128f16(<128 x half> %op)
@@ -854,6 +922,7 @@ define float @fminv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
@@ -869,6 +938,7 @@ define float @fminv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fminnmv s0, p0, z0.s
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminv_v16f32:
@@ -876,6 +946,7 @@ define float @fminv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminnmv s0, p0, z0.s
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %op)
@@ -888,6 +959,7 @@ define float @fminv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %op)
@@ -900,6 +972,7 @@ define float @fminv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmin.v64f32(<64 x float> %op)
@@ -931,6 +1004,7 @@ define double @fminv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
@@ -946,6 +1020,7 @@ define double @fminv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fminnmv d0, p0, z0.d
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminv_v8f64:
@@ -953,6 +1028,7 @@ define double @fminv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminnmv d0, p0, z0.d
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %op)
@@ -965,6 +1041,7 @@ define double @fminv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %op)
@@ -977,6 +1054,7 @@ define double @fminv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %op)
@@ -1011,6 +1089,7 @@ define half @fmaximumv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
@@ -1026,6 +1105,7 @@ define half @fmaximumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fmaxv h0, p0, z0.h
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaximumv_v32f16:
@@ -1033,6 +1113,7 @@ define half @fmaximumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxv h0, p0, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v32f16(<32 x half> %op)
@@ -1045,6 +1126,7 @@ define half @fmaximumv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v64f16(<64 x half> %op)
@@ -1057,6 +1139,7 @@ define half @fmaximumv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fmaximum.v128f16(<128 x half> %op)
@@ -1089,6 +1172,7 @@ define float @fmaximumv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
@@ -1104,6 +1188,7 @@ define float @fmaximumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fmaxv s0, p0, z0.s
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaximumv_v16f32:
@@ -1111,6 +1196,7 @@ define float @fmaximumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxv s0, p0, z0.s
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v16f32(<16 x float> %op)
@@ -1123,6 +1209,7 @@ define float @fmaximumv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v32f32(<32 x float> %op)
@@ -1135,6 +1222,7 @@ define float @fmaximumv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %op)
@@ -1166,6 +1254,7 @@ define double @fmaximumv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
@@ -1181,6 +1270,7 @@ define double @fmaximumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fmaxv d0, p0, z0.d
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fmaximumv_v8f64:
@@ -1188,6 +1278,7 @@ define double @fmaximumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fmaxv d0, p0, z0.d
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v8f64(<8 x double> %op)
@@ -1200,6 +1291,7 @@ define double @fmaximumv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v16f64(<16 x double> %op)
@@ -1212,6 +1304,7 @@ define double @fmaximumv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %op)
@@ -1246,6 +1339,7 @@ define half @fminimumv_v16f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
@@ -1261,6 +1355,7 @@ define half @fminimumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
 ; VBITS_GE_256-NEXT:    fminv h0, p0, z0.h
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminimumv_v32f16:
@@ -1268,6 +1363,7 @@ define half @fminimumv_v32f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminv h0, p0, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v32f16(<32 x half> %op)
@@ -1280,6 +1376,7 @@ define half @fminimumv_v64f16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl64
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v64f16(<64 x half> %op)
@@ -1292,6 +1389,7 @@ define half @fminimumv_v128f16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl128
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <128 x half>, ptr %a
   %res = call half @llvm.vector.reduce.fminimum.v128f16(<128 x half> %op)
@@ -1324,6 +1422,7 @@ define float @fminimumv_v8f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <8 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
@@ -1339,6 +1438,7 @@ define float @fminimumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    fminv s0, p0, z0.s
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminimumv_v16f32:
@@ -1346,6 +1446,7 @@ define float @fminimumv_v16f32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminv s0, p0, z0.s
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v16f32(<16 x float> %op)
@@ -1358,6 +1459,7 @@ define float @fminimumv_v32f32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v32f32(<32 x float> %op)
@@ -1370,6 +1472,7 @@ define float @fminimumv_v64f32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl64
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <64 x float>, ptr %a
   %res = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %op)
@@ -1401,6 +1504,7 @@ define double @fminimumv_v4f64(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <4 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
@@ -1416,6 +1520,7 @@ define double @fminimumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
 ; VBITS_GE_256-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
 ; VBITS_GE_256-NEXT:    fminv d0, p0, z0.d
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fminimumv_v8f64:
@@ -1423,6 +1528,7 @@ define double @fminimumv_v8f64(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    fminv d0, p0, z0.d
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <8 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v8f64(<8 x double> %op)
@@ -1435,6 +1541,7 @@ define double @fminimumv_v16f64(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl16
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <16 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v16f64(<16 x double> %op)
@@ -1447,6 +1554,7 @@ define double @fminimumv_v32f64(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl32
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op = load <32 x double>, ptr %a
   %res = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %op)

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
index 383c697d74ea00..d1e9dc13f50e87 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll
@@ -282,6 +282,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
 define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-LABEL: select_v4f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -302,6 +303,7 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v8f64:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_256-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.d
 ; VBITS_GE_256-NEXT:    mov z0.d, x8
@@ -320,6 +322,7 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
 ;
 ; VBITS_GE_512-LABEL: select_v8f64:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_512-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_512-NEXT:    ptrue p0.d
 ; VBITS_GE_512-NEXT:    mov z0.d, x8
@@ -340,6 +343,7 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 {
 define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-LABEL: select_v16f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -360,6 +364,7 @@ define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 define void @select_v32f64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-LABEL: select_v32f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
index baab397f093573..af54b146c5b663 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll
@@ -111,6 +111,7 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptoui <2 x half> %op1 to <2 x i32>
   ret <2 x i32> %res
@@ -218,10 +219,12 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
 define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptoui <2 x half> %op1 to <2 x i64>
   ret <2 x i64> %res
@@ -329,6 +332,7 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptoui <4 x float> %op1 to <4 x i16>
   ret <4 x i16> %res
@@ -341,6 +345,7 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x float>, ptr %a
   %res = fptoui <8 x float> %op1 to <8 x i16>
@@ -507,6 +512,7 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptoui <1 x float> %op1 to <1 x i64>
   ret <1 x i64> %res
@@ -605,9 +611,11 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptoui <1 x double> %op1 to <1 x i16>
   ret <1 x i16> %res
@@ -632,6 +640,7 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptoui <4 x double> %op1 to <4 x i16>
@@ -652,6 +661,7 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i16:
@@ -661,6 +671,7 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x double>, ptr %a
   %res = fptoui <8 x double> %op1 to <8 x i16>
@@ -703,6 +714,7 @@ define void @fcvtzu_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
 ; CHECK-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -728,6 +740,7 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptoui <4 x double> %op1 to <4 x i32>
@@ -991,6 +1004,7 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.4s, v0.4h
 ; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptosi <2 x half> %op1 to <2 x i32>
   ret <2 x i32> %res
@@ -1098,10 +1112,12 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) vscale_range(2,0) #0 {
 define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v2f16_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptosi <2 x half> %op1 to <2 x i64>
   ret <2 x i64> %res
@@ -1209,6 +1225,7 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptosi <4 x float> %op1 to <4 x i16>
   ret <4 x i16> %res
@@ -1221,6 +1238,7 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x float>, ptr %a
   %res = fptosi <8 x float> %op1 to <8 x i16>
@@ -1387,6 +1405,7 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtl v0.2d, v0.2s
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = fptosi <1 x float> %op1 to <1 x i64>
   ret <1 x i64> %res
@@ -1485,9 +1504,11 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = fptosi <1 x double> %op1 to <1 x i16>
   ret <1 x i16> %res
@@ -1512,6 +1533,7 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptosi <4 x double> %op1 to <4 x i16>
@@ -1532,6 +1554,7 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i16:
@@ -1541,6 +1564,7 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x double>, ptr %a
   %res = fptosi <8 x double> %op1 to <8 x i16>
@@ -1583,6 +1607,7 @@ define void @fcvtzs_v32f64_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
 ; CHECK-NEXT:    xtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -1608,6 +1633,7 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x double>, ptr %a
   %res = fptosi <4 x double> %op1 to <4 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
index b34aa0556c23ae..61e04682fa0bfb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll
@@ -18,6 +18,7 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #16
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
@@ -28,6 +29,7 @@ define void @fcvt_v4f64_v4f128(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    bl __extenddftf2
 ; CHECK-NEXT:    add x8, sp, #48
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
@@ -68,19 +70,23 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
 ; CHECK-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
@@ -89,19 +95,23 @@ define void @fcvt_v4f128_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    str z0, [x8, #1, mul vl] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #64] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #112] // 16-byte Folded Reload
 ; CHECK-NEXT:    bl __trunctfdf2
 ; CHECK-NEXT:    ldr q1, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    add x8, sp, #128
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
index d1ddad591e7e43..6f4d257039bca1 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll
@@ -14,7 +14,9 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: insertelement_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov h1, #5.00000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = insertelement <4 x half> %op1, half 5.0, i64 3
   ret <4 x half> %r
@@ -133,7 +135,9 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: insertelement_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov s1, #5.00000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = insertelement <2 x float> %op1, float 5.0, i64 1
   ret <2 x float> %r

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
index 7758955bca48d9..58fca3a2cf8b6e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll
@@ -711,7 +711,10 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 {
 ; CHECK-LABEL: mul_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = mul <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -721,7 +724,10 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
 ; CHECK-LABEL: mul_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = mul <2 x i64> %op1, %op2
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
index a5d2a09048e354..0ddf434eff930a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll
@@ -30,9 +30,11 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: sdiv_v8i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
@@ -53,13 +55,16 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_256-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_256-NEXT:    mov v0.b[7], w8
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v8i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
@@ -80,6 +85,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_512-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_512-NEXT:    mov v0.b[7], w8
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -112,9 +118,11 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: sdiv_v16i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ext z1.b, z1.b, z1.b, #16
@@ -128,18 +136,22 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z1.b, z1.b
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v16i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -292,6 +304,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_256-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_256-NEXT:    mov v0.h[3], w8
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v4i16:
@@ -307,6 +320,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_512-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_512-NEXT:    mov v0.h[3], w8
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <4 x i16> %op1, %op2
   ret <4 x i16> %res
@@ -327,20 +341,26 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: sdiv_v8i16:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: sdiv_v8i16:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = sdiv <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -480,7 +500,10 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -491,7 +514,10 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -596,7 +622,10 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -607,7 +636,10 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i64> %op1, %op2
   ret <2 x i64> %res
@@ -731,9 +763,11 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: udiv_v8i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
@@ -754,13 +788,16 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_256-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_256-NEXT:    mov v0.b[7], w8
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v8i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
@@ -781,6 +818,7 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov v0.b[6], w8
 ; VBITS_GE_512-NEXT:    umov w8, v1.h[7]
 ; VBITS_GE_512-NEXT:    mov v0.b[7], w8
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -813,9 +851,11 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: udiv_v16i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ext z1.b, z1.b, z1.b, #16
@@ -829,18 +869,22 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    splice z1.h, p0, z1.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z1.b, z1.b
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v16i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
 ; VBITS_GE_512-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -980,6 +1024,7 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_256-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_256-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_256-NEXT:    mov v0.h[3], w8
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v4i16:
@@ -995,6 +1040,7 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 {
 ; VBITS_GE_512-NEXT:    mov w8, v1.s[3]
 ; VBITS_GE_512-NEXT:    mov v0.h[2], w9
 ; VBITS_GE_512-NEXT:    mov v0.h[3], w8
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <4 x i16> %op1, %op2
   ret <4 x i16> %res
@@ -1015,20 +1061,26 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: udiv_v8i16:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
-; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: udiv_v8i16:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
+; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
-; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = udiv <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -1159,7 +1211,10 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -1170,7 +1225,10 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -1275,7 +1333,10 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -1286,7 +1347,10 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = udiv <2 x i64> %op1, %op2
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
index 4ca25ba9bd0206..4feb86305f8f62 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll
@@ -15,8 +15,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v8i1_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
@@ -37,8 +38,9 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i3_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    lsl z0.d, z0.d, #61
 ; CHECK-NEXT:    asr z0.d, z0.d, #61
@@ -56,8 +58,9 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl16
+; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = sext <16 x i8> %a to <16 x i16>
@@ -138,8 +141,9 @@ define void @sext_v128i8_v128i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -151,10 +155,11 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: sext_v16i8_v16i32:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
-; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
+; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
+; VBITS_GE_256-NEXT:    sunpklo z0.h, z0.b
 ; VBITS_GE_256-NEXT:    sunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
@@ -164,8 +169,9 @@ define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: sext_v16i8_v16i32:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
+; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -220,8 +226,9 @@ define void @sext_v64i8_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    sxtb z0.d, p0/m, z0.d
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -248,8 +255,9 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: sext_v8i8_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    sunpklo z0.h, z0.b
 ; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
@@ -262,8 +270,9 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) vscale_range(8,0) #0 {
 ; CHECK-LABEL: sext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl16
+; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -299,8 +308,9 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = sext <8 x i16> %a to <8 x i32>
@@ -380,8 +390,9 @@ define void @sext_v64i16_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -393,10 +404,11 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: sext_v8i16_v8i64:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
-; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
+; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
+; VBITS_GE_256-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    sunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z0.d, z0.s
 ; VBITS_GE_256-NEXT:    sunpklo z1.d, z1.s
@@ -406,8 +418,9 @@ define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: sext_v8i16_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    sunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    sunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -459,8 +472,9 @@ define void @sext_v32i16_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = sext <4 x i32> %a to <4 x i64>
@@ -540,8 +554,9 @@ define void @sext_v32i32_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl16
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = zext <16 x i8> %a to <16 x i16>
@@ -622,8 +637,9 @@ define void @zext_v128i8_v128i16(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -635,10 +651,11 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: zext_v16i8_v16i32:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
-; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
+; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
+; VBITS_GE_256-NEXT:    uunpklo z0.h, z0.b
 ; VBITS_GE_256-NEXT:    uunpklo z1.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
@@ -648,8 +665,9 @@ define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: zext_v16i8_v16i32:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
+; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -704,8 +722,9 @@ define void @zext_v64i8_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -732,8 +751,9 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: zext_v8i8_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    uunpklo z0.h, z0.b
 ; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
@@ -746,8 +766,9 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) #0 {
 define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) vscale_range(8,0) #0 {
 ; CHECK-LABEL: zext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl16
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
@@ -783,8 +804,9 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = zext <8 x i16> %a to <8 x i32>
@@ -864,8 +886,9 @@ define void @zext_v64i16_v64i32(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -877,10 +900,11 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) vscale_range(2,0) #0 {
 define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ; VBITS_GE_256-LABEL: zext_v8i16_v8i64:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
-; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
-; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
+; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
+; VBITS_GE_256-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_256-NEXT:    uunpklo z1.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z0.d, z0.s
 ; VBITS_GE_256-NEXT:    uunpklo z1.d, z1.s
@@ -890,8 +914,9 @@ define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) #0 {
 ;
 ; VBITS_GE_512-LABEL: zext_v8i16_v8i64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    uunpklo z0.s, z0.h
 ; VBITS_GE_512-NEXT:    uunpklo z0.d, z0.s
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
@@ -943,8 +968,9 @@ define void @zext_v32i16_v32i64(ptr %in, ptr %out) vscale_range(16,0) #0 {
 define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 {
 ; CHECK-LABEL: zext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %b = zext <4 x i32> %a to <4 x i64>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
index 3c1f6cf05a1270..4926684ddc2de7 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll
@@ -311,7 +311,10 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -322,7 +325,10 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.smax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res
@@ -713,7 +719,10 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -724,7 +733,10 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: smin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.smin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res
@@ -1115,7 +1127,10 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -1126,7 +1141,10 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.umax.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res
@@ -1517,7 +1535,10 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
   ret <1 x i64> %res
@@ -1528,7 +1549,10 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #
 ; CHECK-LABEL: umin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.umin.v2i64(<2 x i64> %op1, <2 x i64> %op2)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
index 5b1f3a76ea8816..331aab17a4cea7 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll
@@ -17,7 +17,10 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: smulh_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x i16> undef, i16 8, i64 0
   %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -34,7 +37,10 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <16 x i8> %op1 to <16 x i16>
   %2 = sext <16 x i8> %op2 to <16 x i16>
@@ -146,7 +152,10 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i16> %op1 to <4 x i32>
   %2 = sext <4 x i16> %op2 to <4 x i32>
@@ -161,7 +170,10 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <8 x i16> %op1 to <8 x i32>
   %2 = sext <8 x i16> %op2 to <8 x i32>
@@ -271,7 +283,10 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i32> %op1 to <2 x i64>
   %2 = sext <2 x i32> %op2 to <2 x i64>
@@ -286,7 +301,10 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <4 x i32> %op1 to <4 x i64>
   %2 = sext <4 x i32> %op2 to <4 x i64>
@@ -396,7 +414,10 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %insert = insertelement <1 x i128> undef, i128 64, i128 0
   %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer
@@ -413,7 +434,10 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: smulh_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = sext <2 x i64> %op1 to <2 x i128>
   %2 = sext <2 x i64> %op2 to <2 x i128>
@@ -528,7 +552,10 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: umulh_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i8> %op1 to <8 x i16>
   %2 = zext <8 x i8> %op2 to <8 x i16>
@@ -543,7 +570,10 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <16 x i8> %op1 to <16 x i16>
   %2 = zext <16 x i8> %op2 to <16 x i16>
@@ -656,7 +686,10 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i16> %op1 to <4 x i32>
   %2 = zext <4 x i16> %op2 to <4 x i32>
@@ -671,7 +704,10 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <8 x i16> %op1 to <8 x i32>
   %2 = zext <8 x i16> %op2 to <8 x i32>
@@ -781,7 +817,10 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i32> %op1 to <2 x i64>
   %2 = zext <2 x i32> %op2 to <2 x i64>
@@ -796,7 +835,10 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <4 x i32> %op1 to <4 x i64>
   %2 = zext <4 x i32> %op2 to <4 x i64>
@@ -908,7 +950,10 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <1 x i64> %op1 to <1 x i128>
   %2 = zext <1 x i64> %op2 to <1 x i128>
@@ -923,7 +968,10 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0)
 ; CHECK-LABEL: umulh_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %1 = zext <2 x i64> %op1 to <2 x i128>
   %2 = zext <2 x i64> %op2 to <2 x i128>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
index 0e8479abc8efeb..752c2cd34bfe48 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll
@@ -38,6 +38,7 @@ define i8 @uaddv_v32i8(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <32 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op)
@@ -54,6 +55,7 @@ define i8 @uaddv_v64i8(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    add z0.b, z1.b, z0.b
 ; VBITS_GE_256-NEXT:    uaddv d0, p0, z0.b
 ; VBITS_GE_256-NEXT:    fmov x0, d0
+; VBITS_GE_256-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: uaddv_v64i8:
@@ -62,6 +64,7 @@ define i8 @uaddv_v64i8(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uaddv d0, p0, z0.b
 ; VBITS_GE_512-NEXT:    fmov x0, d0
+; VBITS_GE_512-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <64 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %op)
@@ -75,6 +78,7 @@ define i8 @uaddv_v128i8(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <128 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> %op)
@@ -88,6 +92,7 @@ define i8 @uaddv_v256i8(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <256 x i8>, ptr %a
   %res = call i8 @llvm.vector.reduce.add.v256i8(<256 x i8> %op)
@@ -123,6 +128,7 @@ define i16 @uaddv_v16i16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <16 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op)
@@ -139,6 +145,7 @@ define i16 @uaddv_v32i16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    add z0.h, z1.h, z0.h
 ; VBITS_GE_256-NEXT:    uaddv d0, p0, z0.h
 ; VBITS_GE_256-NEXT:    fmov x0, d0
+; VBITS_GE_256-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: uaddv_v32i16:
@@ -147,6 +154,7 @@ define i16 @uaddv_v32i16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uaddv d0, p0, z0.h
 ; VBITS_GE_512-NEXT:    fmov x0, d0
+; VBITS_GE_512-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <32 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %op)
@@ -160,6 +168,7 @@ define i16 @uaddv_v64i16(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <64 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v64i16(<64 x i16> %op)
@@ -173,6 +182,7 @@ define i16 @uaddv_v128i16(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <128 x i16>, ptr %a
   %res = call i16 @llvm.vector.reduce.add.v128i16(<128 x i16> %op)
@@ -208,6 +218,7 @@ define i32 @uaddv_v8i32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <8 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op)
@@ -224,6 +235,7 @@ define i32 @uaddv_v16i32(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    add z0.s, z1.s, z0.s
 ; VBITS_GE_256-NEXT:    uaddv d0, p0, z0.s
 ; VBITS_GE_256-NEXT:    fmov x0, d0
+; VBITS_GE_256-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: uaddv_v16i32:
@@ -232,6 +244,7 @@ define i32 @uaddv_v16i32(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uaddv d0, p0, z0.s
 ; VBITS_GE_512-NEXT:    fmov x0, d0
+; VBITS_GE_512-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; VBITS_GE_512-NEXT:    ret
   %op = load <16 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %op)
@@ -245,6 +258,7 @@ define i32 @uaddv_v32i32(ptr %a) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <32 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %op)
@@ -258,6 +272,7 @@ define i32 @uaddv_v64i32(ptr %a) vscale_range(16,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %op = load <64 x i32>, ptr %a
   %res = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %op)
@@ -268,6 +283,7 @@ define i32 @uaddv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @uaddv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: uaddv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
@@ -611,6 +627,7 @@ define i32 @smaxv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @smaxv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: smaxv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.smax.v1i64(<1 x i64> %a)
@@ -622,6 +639,7 @@ define i64 @smaxv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: smaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -955,6 +973,7 @@ define i32 @sminv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @sminv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sminv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.smin.v1i64(<1 x i64> %a)
@@ -966,6 +985,7 @@ define i64 @sminv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1299,6 +1319,7 @@ define i32 @umaxv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @umaxv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: umaxv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
@@ -1310,6 +1331,7 @@ define i64 @umaxv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: umaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1643,6 +1665,7 @@ define i32 @uminv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @uminv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: uminv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.umin.v1i64(<1 x i64> %a)
@@ -1654,6 +1677,7 @@ define i64 @uminv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: uminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
index 5a0ce6ed1d13e0..2d78945399176e 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll
@@ -31,6 +31,8 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: srem_v8i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -55,10 +57,13 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_256-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_256-NEXT:    mls v0.8b, v3.8b, v1.8b
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: srem_v8i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
@@ -83,6 +88,7 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_512-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_512-NEXT:    mls v0.8b, v3.8b, v1.8b
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = srem <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -116,6 +122,8 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: srem_v16i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -133,10 +141,13 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    splice z3.h, p0, z3.h, z2.h
 ; VBITS_GE_256-NEXT:    uzp1 z2.b, z3.b, z3.b
 ; VBITS_GE_256-NEXT:    mls v0.16b, v2.16b, v1.16b
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: srem_v16i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    sunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    sunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
@@ -146,6 +157,7 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; VBITS_GE_512-NEXT:    mls v0.16b, v2.16b, v1.16b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = srem <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -347,22 +359,28 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: srem_v8i16:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    sunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    sunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_256-NEXT:    mls v0.8h, v2.8h, v1.8h
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: srem_v8i16:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    sunpklo z2.s, z1.h
 ; VBITS_GE_512-NEXT:    sunpklo z3.s, z0.h
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    mls v0.8h, v2.8h, v1.8h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = srem <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -516,9 +534,12 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -529,9 +550,12 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.4s, v2.4s, v1.4s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -657,9 +681,12 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -671,9 +698,12 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: srem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = srem <2 x i64> %op1, %op2
   ret <2 x i64> %res
@@ -819,6 +849,8 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: urem_v8i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -843,10 +875,13 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_256-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_256-NEXT:    mls v0.8b, v3.8b, v1.8b
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: urem_v8i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $d1 killed $d1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
@@ -871,6 +906,7 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    umov w8, v2.h[7]
 ; VBITS_GE_512-NEXT:    mov v3.b[7], w8
 ; VBITS_GE_512-NEXT:    mls v0.8b, v3.8b, v1.8b
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = urem <8 x i8> %op1, %op2
   ret <8 x i8> %res
@@ -904,6 +940,8 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: urem_v16i8:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_256-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
@@ -921,10 +959,13 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_256-NEXT:    splice z3.h, p0, z3.h, z2.h
 ; VBITS_GE_256-NEXT:    uzp1 z2.b, z3.b, z3.b
 ; VBITS_GE_256-NEXT:    mls v0.16b, v2.16b, v1.16b
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: urem_v16i8:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    uunpklo z2.h, z1.b
 ; VBITS_GE_512-NEXT:    uunpklo z3.h, z0.b
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
@@ -934,6 +975,7 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 {
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; VBITS_GE_512-NEXT:    mls v0.16b, v2.16b, v1.16b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = urem <16 x i8> %op1, %op2
   ret <16 x i8> %res
@@ -1135,22 +1177,28 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 {
 ;
 ; VBITS_GE_256-LABEL: urem_v8i16:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_256-NEXT:    uunpklo z2.s, z1.h
 ; VBITS_GE_256-NEXT:    uunpklo z3.s, z0.h
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_256-NEXT:    mls v0.8h, v2.8h, v1.8h
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: urem_v8i16:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $q1 killed $q1 def $z1
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; VBITS_GE_512-NEXT:    uunpklo z2.s, z1.h
 ; VBITS_GE_512-NEXT:    uunpklo z3.s, z0.h
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_512-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; VBITS_GE_512-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; VBITS_GE_512-NEXT:    mls v0.8h, v2.8h, v1.8h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %res = urem <8 x i16> %op1, %op2
   ret <8 x i16> %res
@@ -1304,9 +1352,12 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.2s, v2.2s, v1.2s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <2 x i32> %op1, %op2
   ret <2 x i32> %res
@@ -1317,9 +1368,12 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls v0.4s, v2.4s, v1.4s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <4 x i32> %op1, %op2
   ret <4 x i32> %res
@@ -1445,9 +1499,12 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <1 x i64> %op1, %op2
   ret <1 x i64> %res
@@ -1459,9 +1516,12 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #
 ; CHECK-LABEL: urem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = urem <2 x i64> %op1, %op2
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
index c0401bdb153317..37396ba7011be4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll
@@ -401,6 +401,7 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_
 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -421,6 +422,7 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 {
 define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
 ; VBITS_GE_256-LABEL: select_v8i64:
 ; VBITS_GE_256:       // %bb.0:
+; VBITS_GE_256-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_256-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_256-NEXT:    ptrue p0.d
 ; VBITS_GE_256-NEXT:    mov z0.d, x8
@@ -439,6 +441,7 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
 ;
 ; VBITS_GE_512-LABEL: select_v8i64:
 ; VBITS_GE_512:       // %bb.0:
+; VBITS_GE_512-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; VBITS_GE_512-NEXT:    and x8, x2, #0x1
 ; VBITS_GE_512-NEXT:    ptrue p0.d
 ; VBITS_GE_512-NEXT:    mov z0.d, x8
@@ -459,6 +462,7 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 {
 define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 ; CHECK-LABEL: select_v16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8
@@ -479,6 +483,7 @@ define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 {
 define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 {
 ; CHECK-LABEL: select_v32i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
index 445736587fc3ac..573fe3d8b8a779 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll
@@ -208,10 +208,12 @@ define void @ucvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i16_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = uitofp <1 x i16> %op1 to <1 x double>
   ret <1 x double> %res
@@ -314,6 +316,7 @@ define void @ucvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf v0.4s, v0.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
@@ -339,6 +342,7 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i32>, ptr %a
   %res = uitofp <8 x i32> %op1 to <8 x half>
@@ -511,6 +515,7 @@ define <1 x double> @ucvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = uitofp <1 x i32> %op1 to <1 x double>
   ret <1 x double> %res
@@ -607,6 +612,7 @@ define void @ucvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x half> @ucvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i64_v1f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    ucvtf h0, x8
 ; CHECK-NEXT:    ret
@@ -619,9 +625,11 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = uitofp <2 x i64> %op1 to <2 x half>
   ret <2 x half> %res
@@ -635,6 +643,7 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = uitofp <4 x i64> %op1 to <4 x half>
@@ -655,6 +664,7 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f16:
@@ -664,6 +674,7 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    ucvtf z0.h, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x i64>, ptr %a
   %res = uitofp <8 x i64> %op1 to <8 x half>
@@ -710,6 +721,7 @@ define void @ucvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x float> @ucvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i64_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf v0.2d, v0.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -735,6 +747,7 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = uitofp <4 x i64> %op1 to <4 x float>
@@ -813,6 +826,7 @@ define void @ucvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @ucvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: ucvtf_v1i64_v1f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    ucvtf d0, d0
 ; CHECK-NEXT:    ret
   %res = uitofp <1 x i64> %op1 to <1 x double>
@@ -1106,10 +1120,12 @@ define void @scvtf_v64i16_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @scvtf_v1i16_v1f64(<1 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i16_v1f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sitofp <1 x i16> %op1 to <1 x double>
   ret <1 x double> %res
@@ -1218,6 +1234,7 @@ define void @scvtf_v32i16_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf v0.4s, v0.4s
 ; CHECK-NEXT:    fcvtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
@@ -1243,6 +1260,7 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i32>, ptr %a
   %res = sitofp <8 x i32> %op1 to <8 x half>
@@ -1415,6 +1433,7 @@ define <1 x double> @scvtf_v1i32_v1f64(<1 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %res = sitofp <1 x i32> %op1 to <1 x double>
   ret <1 x double> %res
@@ -1517,6 +1536,7 @@ define void @scvtf_v32i32_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x half> @scvtf_v1i64_v1f16(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i64_v1f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    scvtf h0, x8
 ; CHECK-NEXT:    ret
@@ -1529,9 +1549,11 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sitofp <2 x i64> %op1 to <2 x half>
   ret <2 x half> %res
@@ -1545,6 +1567,7 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = sitofp <4 x i64> %op1 to <4 x half>
@@ -1565,6 +1588,7 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: scvtf_v8i64_v8f16:
@@ -1574,6 +1598,7 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 {
 ; VBITS_GE_512-NEXT:    scvtf z0.h, p0/m, z0.d
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %op1 = load <8 x i64>, ptr %a
   %res = sitofp <8 x i64> %op1 to <8 x half>
@@ -1620,6 +1645,7 @@ define void @scvtf_v32i64_v32f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x float> @scvtf_v1i64_v1f32(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i64_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf v0.2d, v0.2d
 ; CHECK-NEXT:    fcvtn v0.2s, v0.2d
 ; CHECK-NEXT:    ret
@@ -1645,6 +1671,7 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <4 x i64>, ptr %a
   %res = sitofp <4 x i64> %op1 to <4 x float>
@@ -1723,6 +1750,7 @@ define void @scvtf_v32i64_v32f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
 define <1 x double> @scvtf_v1i64_v1f64(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: scvtf_v1i64_v1f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    scvtf d0, d0
 ; CHECK-NEXT:    ret
   %res = sitofp <1 x i64> %op1 to <1 x double>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
index 8c1af6149a4265..d0585274a43e3c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll
@@ -14,6 +14,7 @@ define i8 @andv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -26,6 +27,7 @@ define i8 @andv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -101,6 +103,7 @@ define i16 @andv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -113,6 +116,7 @@ define i16 @andv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -188,6 +192,7 @@ define i32 @andv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -200,6 +205,7 @@ define i32 @andv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -274,6 +280,7 @@ define i32 @andv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @andv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)
@@ -285,6 +292,7 @@ define i64 @andv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: andv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -364,6 +372,7 @@ define i8 @eorv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -376,6 +385,7 @@ define i8 @eorv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -451,6 +461,7 @@ define i16 @eorv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -463,6 +474,7 @@ define i16 @eorv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -538,6 +550,7 @@ define i32 @eorv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -550,6 +563,7 @@ define i32 @eorv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -624,6 +638,7 @@ define i32 @eorv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @eorv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %a)
@@ -635,6 +650,7 @@ define i64 @eorv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: eorv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -714,6 +730,7 @@ define i8 @orv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -726,6 +743,7 @@ define i8 @orv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -801,6 +819,7 @@ define i16 @orv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -813,6 +832,7 @@ define i16 @orv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -888,6 +908,7 @@ define i32 @orv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -900,6 +921,7 @@ define i32 @orv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -974,6 +996,7 @@ define i32 @orv_v64i32(ptr %a) vscale_range(16,0) #0 {
 define i64 @orv_v1i64(<1 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %res = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %a)
@@ -985,6 +1008,7 @@ define i64 @orv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: orv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
index c7e53bd0d0b011..55d37d1bda5e4c 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-128bit-loads.ll
@@ -16,6 +16,7 @@ define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <16 x i8> @llvm.masked.load.v16i8(ptr %src, i32 8, <16 x i1> %mask, <16 x i8> zeroinitializer)
   ret <16 x i8> %load
@@ -30,6 +31,7 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.8h, v0.8h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <8 x half> @llvm.masked.load.v8f16(ptr %src, i32 8, <8 x i1> %mask, <8 x half> zeroinitializer)
   ret <8 x half> %load
@@ -44,6 +46,7 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <4 x float> @llvm.masked.load.v4f32(ptr %src, i32 8, <4 x i1> %mask, <4 x float> zeroinitializer)
   ret <4 x float> %load
@@ -58,6 +61,7 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
 ; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> zeroinitializer)
   ret <2 x double> %load
@@ -68,11 +72,13 @@ define <2 x double> @masked_load_passthru_v2f64(ptr %src, <2 x i1> %mask, <2 x d
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    shl v0.2d, v0.2d, #63
 ; CHECK-NEXT:    cmlt v0.2d, v0.2d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <2 x double> @llvm.masked.load.v2f64(ptr %src, i32 8, <2 x i1> %mask, <2 x double> %passthru)
   ret <2 x double> %load

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
index dcc6c273114e9d..c22d9e71c51a93 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
@@ -23,6 +23,7 @@ define <2 x half> @masked_load_v2f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    mov v0.h[1], w8
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <2 x half>, ptr %ap
   %b = load <2 x half>, ptr %bp
@@ -40,6 +41,7 @@ define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    fcmeq v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <2 x float>, ptr %ap
   %b = load <2 x float>, ptr %bp
@@ -57,6 +59,7 @@ define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(1,0) #0 {
 ; CHECK-NEXT:    fcmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x float>, ptr %ap
   %b = load <4 x float>, ptr %bp

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
index 5c764d39feb6db..6e8d477fc3ad58 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll
@@ -7,7 +7,10 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 {
 ; CHECK-LABEL: mul_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = mul <2 x i64> %op1, %op2
   ret <2 x i64> %res
@@ -17,7 +20,10 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i32> %op1, %op2
   ret <4 x i32> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
index 7c0e576564cdb2..4ba34407ff1846 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll
@@ -16,6 +16,7 @@ define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) #0 {
 ; CHECK-NEXT:    mov w8, v1.s[3]
 ; CHECK-NEXT:    mov v0.h[2], w9
 ; CHECK-NEXT:    mov v0.h[3], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %el0 = extractelement <vscale x 4 x i1> %a, i32 0
   %el1 = extractelement <vscale x 4 x i1> %a, i32 1

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
index 1d2915d658ad27..82d350f6e28f82 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll
@@ -13,7 +13,9 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %op)
   ret <8 x i8> %res
@@ -23,7 +25,9 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> %op)
   ret <16 x i8> %res
@@ -101,7 +105,9 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %op)
   ret <4 x i16> %res
@@ -111,7 +117,9 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %op)
   ret <8 x i16> %res
@@ -189,7 +197,9 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %op)
   ret <2 x i32> %res
@@ -199,7 +209,9 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %op)
   ret <4 x i32> %res
@@ -277,7 +289,9 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)
   ret <1 x i64> %res
@@ -287,7 +301,9 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) vscale_range(2,0) #0 {
 ; CHECK-LABEL: bitreverse_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %op)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index e903280404a3fb..21a5abdeaa4d53 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -9,8 +9,10 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #1
 ; CHECK-NEXT:    subr z0.b, z0.b, #0 // =0x0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 -2, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
   ret <8 x i8> %res
@@ -20,7 +22,9 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
   ret <16 x i8> %res
@@ -99,7 +103,9 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
   ret <4 x i16> %res
@@ -109,8 +115,10 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #3
 ; CHECK-NEXT:    subr z0.h, z0.h, #0 // =0x0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 -8, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
   ret <8 x i16> %res
@@ -189,8 +197,10 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
 ; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 -32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
   ret <2 x i32> %res
@@ -200,7 +210,9 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
   ret <4 x i32> %res
@@ -279,8 +291,10 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #7
 ; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 -128, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
   ret <1 x i64> %res
@@ -291,7 +305,9 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) vscale_range(2,0) #0 {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
   ret <2 x i64> %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
index 17525f4ccc476e..749a1866e7192a 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
@@ -10,9 +10,10 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) vscale_range(2,2) #0 {
 ; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    mov z1.d, z0.d
-; CHECK-NEXT:    ext z1.b, z1.b, z0.b, #16
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #16
 ; CHECK-NEXT:    st2 { v0.4s, v1.4s }, [x0]
 ; CHECK-NEXT:    ret
   %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
index 5b194961cbbe84..b633057be139ce 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll
@@ -349,6 +349,7 @@ define void @splat_v32i64(i64 %a, ptr %b) vscale_range(16,0) #0 {
 define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.4h, v0.h[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x half> undef, half %a, i64 0
@@ -360,6 +361,7 @@ define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 {
 define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $q0
 ; CHECK-NEXT:    dup v0.8h, v0.h[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x half> undef, half %a, i64 0
@@ -370,8 +372,9 @@ define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 {
 define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v16f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl16
+; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <16 x half> undef, half %a, i64 0
@@ -383,17 +386,19 @@ define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 {
 define void @splat_v32f16(half %a, ptr %b) #0 {
 ; VBITS_GE_256-LABEL: splat_v32f16:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    mov z0.h, h0
+; VBITS_GE_256-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
 ; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
+; VBITS_GE_256-NEXT:    mov z0.h, h0
 ; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
 ; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: splat_v32f16:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    mov z0.h, h0
+; VBITS_GE_512-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
+; VBITS_GE_512-NEXT:    mov z0.h, h0
 ; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
   %insert = insertelement <32 x half> undef, half %a, i64 0
@@ -405,8 +410,9 @@ define void @splat_v32f16(half %a, ptr %b) #0 {
 define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
 ; CHECK-LABEL: splat_v64f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl64
+; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <64 x half> undef, half %a, i64 0
@@ -418,8 +424,9 @@ define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 {
 define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
 ; CHECK-LABEL: splat_v128f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl128
+; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <128 x half> undef, half %a, i64 0
@@ -432,6 +439,7 @@ define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 {
 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.2s, v0.s[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <2 x float> undef, float %a, i64 0
@@ -443,6 +451,7 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0
 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    dup v0.4s, v0.s[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x float> undef, float %a, i64 0
@@ -453,8 +462,9 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0
 define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v8f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl8
+; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <8 x float> undef, float %a, i64 0
@@ -466,17 +476,19 @@ define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 {
 define void @splat_v16f32(float %a, ptr %b) #0 {
 ; VBITS_GE_256-LABEL: splat_v16f32:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    mov z0.s, s0
+; VBITS_GE_256-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
 ; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
+; VBITS_GE_256-NEXT:    mov z0.s, s0
 ; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
 ; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: splat_v16f32:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    mov z0.s, s0
+; VBITS_GE_512-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
+; VBITS_GE_512-NEXT:    mov z0.s, s0
 ; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
   %insert = insertelement <16 x float> undef, float %a, i64 0
@@ -488,8 +500,9 @@ define void @splat_v16f32(float %a, ptr %b) #0 {
 define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
 ; CHECK-LABEL: splat_v32f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl32
+; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <32 x float> undef, float %a, i64 0
@@ -501,8 +514,9 @@ define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 {
 define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 {
 ; CHECK-LABEL: splat_v64f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl64
+; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <64 x float> undef, float %a, i64 0
@@ -525,6 +539,7 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) vscale_range(2,0)
 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    dup v0.2d, v0.d[0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <2 x double> undef, double %a, i64 0
@@ -535,8 +550,9 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0)
 define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
 ; CHECK-LABEL: splat_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl4
+; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <4 x double> undef, double %a, i64 0
@@ -548,17 +564,19 @@ define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 {
 define void @splat_v8f64(double %a, ptr %b) #0 {
 ; VBITS_GE_256-LABEL: splat_v8f64:
 ; VBITS_GE_256:       // %bb.0:
-; VBITS_GE_256-NEXT:    mov z0.d, d0
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
 ; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
+; VBITS_GE_256-NEXT:    mov z0.d, d0
 ; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
 ; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: splat_v8f64:
 ; VBITS_GE_512:       // %bb.0:
-; VBITS_GE_512-NEXT:    mov z0.d, d0
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
+; VBITS_GE_512-NEXT:    mov z0.d, d0
 ; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
 ; VBITS_GE_512-NEXT:    ret
   %insert = insertelement <8 x double> undef, double %a, i64 0
@@ -570,8 +588,9 @@ define void @splat_v8f64(double %a, ptr %b) #0 {
 define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
 ; CHECK-LABEL: splat_v16f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl16
+; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <16 x double> undef, double %a, i64 0
@@ -583,8 +602,9 @@ define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 {
 define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 {
 ; CHECK-LABEL: splat_v32f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl32
+; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   %insert = insertelement <32 x double> undef, double %a, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
index 8f65308ecbe86f..aef19d23109b4f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
@@ -404,6 +404,7 @@ define <8 x i1> @no_warn_dropped_scalable(ptr %in) #0 {
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   br label %bb1

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
index 7e9cdbfe64a6ea..8dc45eadce6f3b 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll
@@ -15,6 +15,7 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.h, vl16
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <16 x i16>, ptr %in
   %b = trunc <16 x i16> %a to <16 x i8>
@@ -101,6 +102,7 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   %b = trunc <8 x i32> %a to <8 x i8>
@@ -119,6 +121,7 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.b, z0.b, z0.b
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z1.b, z1.b
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: trunc_v16i32_v16i8:
@@ -127,6 +130,7 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) #0 {
 ; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %a = load <16 x i32>, ptr %in
   %b = trunc <16 x i32> %a to <16 x i8>
@@ -181,6 +185,7 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.s, vl8
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <8 x i32>, ptr %in
   %b = trunc <8 x i32> %a to <8 x i16>
@@ -268,6 +273,7 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %in
   %b = trunc <4 x i64> %a to <4 x i8>
@@ -287,6 +293,7 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) #0 {
 ; VBITS_GE_256-NEXT:    splice z1.s, p0, z1.s, z0.s
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.b, z0.b, z0.b
+; VBITS_GE_256-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: trunc_v8i64_v8i8:
@@ -296,6 +303,7 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) #0 {
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; VBITS_GE_512-NEXT:    uzp1 z0.b, z0.b, z0.b
+; VBITS_GE_512-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %a = load <8 x i64>, ptr %in
   %b = trunc <8 x i64> %a to <8 x i8>
@@ -310,6 +318,7 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) vscale_range(8,0) #0 {
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <16 x i64>, ptr %in
   %b = trunc <16 x i64> %a to <16 x i8>
@@ -347,6 +356,7 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %in
   %b = trunc <4 x i64> %a to <4 x i16>
@@ -365,6 +375,7 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) #0 {
 ; VBITS_GE_256-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; VBITS_GE_256-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; VBITS_GE_256-NEXT:    mov v0.d[1], v2.d[0]
+; VBITS_GE_256-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_256-NEXT:    ret
 ;
 ; VBITS_GE_512-LABEL: trunc_v8i64_v8i16:
@@ -373,6 +384,7 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) #0 {
 ; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; VBITS_GE_512-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; VBITS_GE_512-NEXT:    uzp1 z0.h, z0.h, z0.h
+; VBITS_GE_512-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; VBITS_GE_512-NEXT:    ret
   %a = load <8 x i64>, ptr %in
   %b = trunc <8 x i64> %a to <8 x i16>
@@ -427,6 +439,7 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) vscale_range(2,0) #0 {
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %a = load <4 x i64>, ptr %in
   %b = trunc <4 x i64> %a to <4 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
index 0c32a6212922c4..20659cde83ee00 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll
@@ -24,6 +24,7 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d0, [x0]
 ; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op1:
@@ -32,6 +33,7 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr d0, [x0]
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op1:
@@ -40,6 +42,7 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x0]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op1:
@@ -50,6 +53,7 @@ define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x0]
 ; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
+; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -75,6 +79,7 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d0, [x1]
 ; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op2:
@@ -83,6 +88,7 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op2:
@@ -91,6 +97,7 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op2:
@@ -101,6 +108,7 @@ define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x1]
 ; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
+; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -127,6 +135,7 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d1, [x1]
 ; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b, z1.b }, z2.b
+; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
@@ -147,6 +156,7 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
+; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
@@ -167,6 +177,7 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
+; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
@@ -187,6 +198,7 @@ define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
+; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -213,6 +225,7 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d1, [x1]
 ; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b, z1.b }, z2.b
+; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_poison_value:
@@ -231,6 +244,7 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
 ; SVE2_128_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
+; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_poison_value:
@@ -249,6 +263,7 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
 ; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
+; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_poison_value:
@@ -267,6 +282,7 @@ define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
 ; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
+; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -281,6 +297,7 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr d0, [x1]
 ; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_op1_poison:
@@ -289,6 +306,7 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_op1_poison:
@@ -297,6 +315,7 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
+; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_op1_poison:
@@ -307,6 +326,7 @@ define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x1]
 ; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
+; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op2 = load <8 x i8>, ptr %b
   %1 = shufflevector <8 x i8> poison, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15>
@@ -334,6 +354,7 @@ define <8 x i8> @negative_test_shuffle_index_size_op_both_maxhw(ptr %a, ptr %b)
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -361,6 +382,7 @@ define <8 x i8> @shuffle_index_size_op1_maxhw(ptr %a, ptr %b) "target-features"=
 ; CHECK-NEXT:    ldr d1, [x0]
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8]
 ; CHECK-NEXT:    tbl z0.b, { z1.b }, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %op1 = load <8 x i8>, ptr %a
   %op2 = load <8 x i8>, ptr %b
@@ -458,6 +480,7 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_128-NEXT:    ldr q1, [x1]
 ; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
 ; SVE2_128-NEXT:    tbl z0.h, { z0.h, z1.h }, z2.h
+; SVE2_128-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_128-NEXT:    ret
 ;
 ; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
@@ -473,6 +496,7 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_128_NOMAX-NEXT:    ldr q1, [x0]
 ; SVE2_128_NOMAX-NEXT:    ldr q2, [x1]
 ; SVE2_128_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
+; SVE2_128_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_128_NOMAX-NEXT:    ret
 ;
 ; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
@@ -488,6 +512,7 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x0]
 ; SVE2_NOMIN_NOMAX-NEXT:    ldr q2, [x1]
 ; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
+; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_NOMIN_NOMAX-NEXT:    ret
 ;
 ; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
@@ -505,6 +530,7 @@ define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr q1, [x0]
 ; SVE2_MIN_256_NOMAX-NEXT:    ldr q2, [x1]
 ; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
+; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2_MIN_256_NOMAX-NEXT:    ret
   %op1 = load <8 x i16>, ptr %a
   %op2 = load <8 x i16>, ptr %b

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
index 6a66a841570d3a..c77861509e4a1d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-llrint.ll
@@ -16,6 +16,7 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    frintx h0, h0
 ; CHECK-NEXT:    frintx h1, h1
@@ -280,6 +281,7 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x8, s0
 ; CHECK-NEXT:    fmov d0, x8
@@ -569,6 +571,8 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) {
 ; CHECK-LABEL: llrint_v4i64_v4f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
@@ -593,6 +597,10 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) {
 ; CHECK-LABEL: llrint_v8i64_v8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-NEXT:    ptrue p0.d, vl4
@@ -631,6 +639,14 @@ define <16 x i64> @llrint_v16f64(<16 x double> %x) {
 ; CHECK-LABEL: llrint_v16f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d, vl2
+; CHECK-NEXT:    // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-NEXT:    // kill: def $q5 killed $q5 def $z5
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-NEXT:    splice z4.d, p1, z4.d, z5.d
@@ -703,6 +719,14 @@ define <32 x i64> @llrint_v32f64(<32 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset w30, -8
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p1.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-NEXT:    // kill: def $q6 killed $q6 def $z6
+; CHECK-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-NEXT:    // kill: def $q5 killed $q5 def $z5
 ; CHECK-NEXT:    ptrue p0.d, vl4
 ; CHECK-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-NEXT:    splice z2.d, p1, z2.d, z3.d

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
index da21b44014b534..6a97e7ad64bf3d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-vector-lrint.ll
@@ -26,6 +26,7 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v2f16:
 ; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i32-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-NEXT:    frintx h0, h0
 ; CHECK-i32-NEXT:    frintx h1, h1
@@ -33,10 +34,12 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, h1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f16:
 ; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    mov h1, v0.h[1]
 ; CHECK-i64-NEXT:    frintx h0, h0
 ; CHECK-i64-NEXT:    frintx h1, h1
@@ -511,6 +514,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ;
 ; CHECK-i64-LABEL: lrint_v1f32:
 ; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    frintx s0, s0
 ; CHECK-i64-NEXT:    fcvtzs x8, s0
 ; CHECK-i64-NEXT:    fmov d0, x8
@@ -569,6 +573,8 @@ define <8 x iXLen> @lrint_v8f32(<8 x float> %x) {
 ; CHECK-i32-LABEL: lrint_v8f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
 ; CHECK-i32-NEXT:    movprfx z2, z0
@@ -634,6 +640,10 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) {
 ; CHECK-i32-LABEL: lrint_v16f32:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
@@ -763,6 +773,14 @@ define <32 x iXLen> @lrint_v32f32(<32 x float> %x) {
 ; CHECK-i32-NEXT:    .cfi_offset w26, -64
 ; CHECK-i32-NEXT:    .cfi_offset w27, -80
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
+; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-i32-NEXT:    ptrue p0.s, vl8
 ; CHECK-i32-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
@@ -1030,6 +1048,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, d1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f64:
@@ -1046,6 +1065,8 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v4f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
 ; CHECK-i32-NEXT:    movprfx z1, z0
@@ -1066,6 +1087,8 @@ define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
 ; CHECK-i64-LABEL: lrint_v4f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    frintx z0.d, p0/m, z0.d
@@ -1090,6 +1113,10 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v8f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p0.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i32-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i32-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
@@ -1123,6 +1150,10 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
 ; CHECK-i64-LABEL: lrint_v8f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ptrue p0.d, vl2
+; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i64-NEXT:    splice z0.d, p0, z0.d, z1.d
 ; CHECK-i64-NEXT:    splice z2.d, p0, z2.d, z3.d
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
@@ -1161,6 +1192,14 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v16f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
+; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
 ; CHECK-i32-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
@@ -1221,6 +1260,14 @@ define <16 x iXLen> @lrint_v16f64(<16 x double> %x) {
 ; CHECK-i64-LABEL: lrint_v16f64:
 ; CHECK-i64:       // %bb.0:
 ; CHECK-i64-NEXT:    ptrue p1.d, vl2
+; CHECK-i64-NEXT:    // kill: def $q6 killed $q6 def $z6
+; CHECK-i64-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-i64-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-i64-NEXT:    // kill: def $q5 killed $q5 def $z5
+; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    splice z6.d, p1, z6.d, z7.d
 ; CHECK-i64-NEXT:    splice z4.d, p1, z4.d, z5.d
@@ -1286,6 +1333,14 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; CHECK-i32-LABEL: lrint_v32f64:
 ; CHECK-i32:       // %bb.0:
 ; CHECK-i32-NEXT:    ptrue p1.d, vl2
+; CHECK-i32-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i32-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-i32-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i32-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i32-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-i32-NEXT:    // kill: def $q5 killed $q5 def $z5
+; CHECK-i32-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-i32-NEXT:    // kill: def $q6 killed $q6 def $z6
 ; CHECK-i32-NEXT:    ptrue p0.d, vl4
 ; CHECK-i32-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-i32-NEXT:    splice z2.d, p1, z2.d, z3.d
@@ -1414,6 +1469,14 @@ define <32 x iXLen> @lrint_v32f64(<32 x double> %x) {
 ; CHECK-i64-NEXT:    .cfi_offset w30, -8
 ; CHECK-i64-NEXT:    .cfi_offset w29, -16
 ; CHECK-i64-NEXT:    ptrue p1.d, vl2
+; CHECK-i64-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-i64-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-i64-NEXT:    // kill: def $q3 killed $q3 def $z3
+; CHECK-i64-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-i64-NEXT:    // kill: def $q7 killed $q7 def $z7
+; CHECK-i64-NEXT:    // kill: def $q6 killed $q6 def $z6
+; CHECK-i64-NEXT:    // kill: def $q4 killed $q4 def $z4
+; CHECK-i64-NEXT:    // kill: def $q5 killed $q5 def $z5
 ; CHECK-i64-NEXT:    ptrue p0.d, vl4
 ; CHECK-i64-NEXT:    splice z0.d, p1, z0.d, z1.d
 ; CHECK-i64-NEXT:    splice z2.d, p1, z2.d, z3.d

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
index 09e22b72d0ef07..b0b6a6a530ddab 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll
@@ -12,7 +12,9 @@ define half @fadda_nxv2f16(half %init, <vscale x 2 x half> %a) {
 ; CHECK-LABEL: fadda_nxv2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.nxv2f16(half %init, <vscale x 2 x half> %a)
   ret half %res
@@ -22,7 +24,9 @@ define half @fadda_nxv4f16(half %init, <vscale x 4 x half> %a) {
 ; CHECK-LABEL: fadda_nxv4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.nxv4f16(half %init, <vscale x 4 x half> %a)
   ret half %res
@@ -32,7 +36,9 @@ define half @fadda_nxv8f16(half %init, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fadda_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fadd.nxv8f16(half %init, <vscale x 8 x half> %a)
   ret half %res
@@ -54,6 +60,7 @@ define half @fadda_nxv6f16(<vscale x 6 x half> %v, half %s) {
 ; CHECK-NEXT:    st1h { z2.d }, p1, [sp, #3, mul vl]
 ; CHECK-NEXT:    ld1h { z2.h }, p0/z, [sp]
 ; CHECK-NEXT:    fadda h0, p0, h0, z2.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -69,6 +76,7 @@ define half @fadda_nxv10f16(<vscale x 10 x half> %v, half %s) {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
@@ -96,6 +104,7 @@ define half @fadda_nxv12f16(<vscale x 12 x half> %v, half %s) {
 ; CHECK-LABEL: fadda_nxv12f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    mov w8, #32768 // =0x8000
 ; CHECK-NEXT:    fadda h2, p0, h2, z0.h
 ; CHECK-NEXT:    uunpklo z0.s, z1.h
@@ -112,7 +121,9 @@ define float @fadda_nxv2f32(float %init, <vscale x 2 x float> %a) {
 ; CHECK-LABEL: fadda_nxv2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.nxv2f32(float %init, <vscale x 2 x float> %a)
   ret float %res
@@ -122,7 +133,9 @@ define float @fadda_nxv4f32(float %init, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fadda_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fadd.nxv4f32(float %init, <vscale x 4 x float> %a)
   ret float %res
@@ -132,7 +145,9 @@ define double @fadda_nxv2f64(double %init, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fadda_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fadd.nxv2f64(double %init, <vscale x 2 x double> %a)
   ret double %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
index d598f46db0d3c8..15ee6a02a16393 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce.ll
@@ -76,6 +76,7 @@ define half @fmaxv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmax.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -86,6 +87,7 @@ define half @fmaxv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -96,6 +98,7 @@ define half @fmaxv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -106,6 +109,7 @@ define float @fmaxv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmax.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -116,6 +120,7 @@ define float @fmaxv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -126,6 +131,7 @@ define double @fmaxv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmax.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -138,6 +144,7 @@ define half @fminv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -148,6 +155,7 @@ define half @fminv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -158,6 +166,7 @@ define half @fminv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -168,6 +177,7 @@ define float @fminv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmin.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -178,6 +188,7 @@ define float @fminv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -188,6 +199,7 @@ define double @fminv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmin.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -203,6 +215,7 @@ define half @fmaximumv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmaximum.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -213,6 +226,7 @@ define half @fmaximumv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -223,6 +237,7 @@ define half @fmaximumv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmaximum.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -233,6 +248,7 @@ define float @fmaximumv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmaximum.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -243,6 +259,7 @@ define float @fmaximumv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fmaximum.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -253,6 +270,7 @@ define double @fmaximumv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmaximum.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -265,6 +283,7 @@ define half @fminimumv_nxv2f16(<vscale x 2 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv2f16(<vscale x 2 x half> %a)
   ret half %res
@@ -275,6 +294,7 @@ define half @fminimumv_nxv4f16(<vscale x 4 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x half> %a)
   ret half %res
@@ -285,6 +305,7 @@ define half @fminimumv_nxv8f16(<vscale x 8 x half> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv8f16(<vscale x 8 x half> %a)
   ret half %res
@@ -295,6 +316,7 @@ define float @fminimumv_nxv2f32(<vscale x 2 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fminimum.nxv2f32(<vscale x 2 x float> %a)
   ret float %res
@@ -305,6 +327,7 @@ define float @fminimumv_nxv4f32(<vscale x 4 x float> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.vector.reduce.fminimum.nxv4f32(<vscale x 4 x float> %a)
   ret float %res
@@ -315,6 +338,7 @@ define double @fminimumv_nxv2f64(<vscale x 2 x double> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fminimum.nxv2f64(<vscale x 2 x double> %a)
   ret double %res
@@ -327,6 +351,7 @@ define float @fadd_reduct_reassoc_v4v8f32(<vscale x 4 x float> %a, <vscale x 8 x
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
 ; CHECK-NEXT:    faddv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %r1 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.0, <vscale x 4 x float> %a)
   %r2 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float -0.0, <vscale x 8 x float> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll b/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
index b8d7ecff02d115..a748cf732e0906 100644
--- a/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-i1-add-reduce.ll
@@ -5,6 +5,7 @@ define i8 @uaddv_zexti8_nxv16i1(<vscale x 16 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv16i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.b
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 16 x i1> %v to <vscale x 16 x i8>
@@ -16,6 +17,7 @@ define i8 @uaddv_zexti8_nxv8i1(<vscale x 8 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv8i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 8 x i1> %v to <vscale x 8 x i8>
@@ -27,6 +29,7 @@ define i16 @uaddv_zexti16_nxv8i1(<vscale x 8 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti16_nxv8i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.h
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 8 x i1> %v to <vscale x 8 x i16>
@@ -38,6 +41,7 @@ define i8 @uaddv_zexti8_nxv4i1(<vscale x 4 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv4i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i8>
@@ -49,6 +53,7 @@ define i16 @uaddv_zexti16_nxv4i1(<vscale x 4 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti16_nxv4i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i16>
@@ -60,6 +65,7 @@ define i32 @uaddv_zexti32_nxv4i1(<vscale x 4 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti32_nxv4i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.s
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 4 x i1> %v to <vscale x 4 x i32>
@@ -71,6 +77,7 @@ define i8 @uaddv_zexti8_nxv2i1(<vscale x 2 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti8_nxv2i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i8>
@@ -82,6 +89,7 @@ define i16 @uaddv_zexti16_nxv2i1(<vscale x 2 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti16_nxv2i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i16>
@@ -93,6 +101,7 @@ define i32 @uaddv_zexti32_nxv2i1(<vscale x 2 x i1> %v) {
 ; CHECK-LABEL: uaddv_zexti32_nxv2i1:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    cntp x0, p0, p0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %3 = zext <vscale x 2 x i1> %v to <vscale x 2 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll b/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
index ac112aa170f290..73bbee094827e7 100644
--- a/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
+++ b/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll
@@ -29,6 +29,7 @@ define <vscale x 8 x i16> @eorv_zero_fill(<vscale x 8 x i1> %pg, <vscale x 8 x i
 define <vscale x 2 x double> @fadda_zero_fill(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) #0 {
 ; CHECK-LABEL: fadda_zero_fill:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 ; CHECK-NEXT:    ret
   %t1 = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
index d1c559736ee2be..433ddbd4a261b2 100644
--- a/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-index-const-step-vector.ll
@@ -7,6 +7,7 @@ define <16 x i8> @v16i8() #0 {
 ; CHECK-LABEL: v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
 }
@@ -15,6 +16,7 @@ define <8 x i16> @v8i16() #0 {
 ; CHECK-LABEL: v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
 }
@@ -23,6 +25,7 @@ define <4 x i32> @v4i32() #0 {
 ; CHECK-LABEL: v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 }
@@ -31,6 +34,7 @@ define <2 x i64> @v2i64() #0 {
 ; CHECK-LABEL: v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <2 x i64> <i64 0, i64 1>
 }
@@ -41,6 +45,7 @@ define <8 x i8> @v8i8() #0 {
 ; CHECK-LABEL: v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   ret <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
 }
@@ -49,6 +54,7 @@ define <4 x i16> @v4i16() #0 {
 ; CHECK-LABEL: v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i16> <i16 0, i16 1, i16 2, i16 3>
 }
@@ -57,6 +63,7 @@ define <2 x i32> @v2i32() #0 {
 ; CHECK-LABEL: v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   ret <2 x i32> <i32 0, i32 1>
 }
@@ -68,6 +75,7 @@ define <4 x i32> @v4i32_non_zero_non_one() #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #2
 ; CHECK-NEXT:    orr z0.s, z0.s, #0x1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 1, i32 3, i32 5, i32 7>
 }
@@ -77,6 +85,7 @@ define <4 x i32> @v4i32_neg_immediates() #0 {
 ; CHECK-LABEL: v4i32_neg_immediates:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #-1, #-2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 -1, i32 -3, i32 -5, i32 -7>
 }
@@ -87,6 +96,7 @@ define <4 x i32> @v4i32_out_range_start() #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z0.s, #0, #1
 ; CHECK-NEXT:    add z0.s, z0.s, #16 // =0x10
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 16, i32 17, i32 18, i32 19>
 }
@@ -97,6 +107,7 @@ define <4 x i32> @v4i32_out_range_step() #0 {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #16 // =0x10
 ; CHECK-NEXT:    index z0.s, #0, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 0, i32 16, i32 32, i32 48>
 }
@@ -108,6 +119,7 @@ define <4 x i32> @v4i32_out_range_start_step() #0 {
 ; CHECK-NEXT:    mov w8, #16 // =0x10
 ; CHECK-NEXT:    index z0.s, #0, w8
 ; CHECK-NEXT:    add z0.s, z0.s, #16 // =0x10
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   ret <4 x i32> <i32 16, i32 32, i32 48, i32 64>
 }

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index 608c5ac4fb6745..7344964f13bbad 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -82,6 +82,7 @@ define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat
 ; CHECK-LABEL: test_lane0_8xbf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl1
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0
@@ -246,6 +247,7 @@ define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a,
 define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 8 x half> undef, half %a, i32 0
   ret <vscale x 8 x half> %b
@@ -254,6 +256,7 @@ define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) {
 define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 4 x half> undef, half %a, i32 0
   ret <vscale x 4 x half> %b
@@ -262,6 +265,7 @@ define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) {
 define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x half> undef, half %a, i32 0
   ret <vscale x 2 x half> %b
@@ -270,6 +274,7 @@ define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) {
 define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0
   ret <vscale x 8 x bfloat> %b
@@ -278,6 +283,7 @@ define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) {
 define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0
   ret <vscale x 4 x bfloat> %b
@@ -286,6 +292,7 @@ define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) {
 define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0
   ret <vscale x 2 x bfloat> %b
@@ -294,6 +301,7 @@ define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) {
 define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 4 x float> undef, float %a, i32 0
   ret <vscale x 4 x float> %b
@@ -302,6 +310,7 @@ define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) {
 define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x float> undef, float %a, i32 0
   ret <vscale x 2 x float> %b
@@ -310,6 +319,7 @@ define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) {
 define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) {
 ; CHECK-LABEL: test_insert_into_undef_nxv2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ret
   %b = insertelement <vscale x 2 x double> undef, double %a, i32 0
   ret <vscale x 2 x double> %b
@@ -439,6 +449,7 @@ define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, p0/m, x0
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    and z0.d, z0.d, #0x1
@@ -509,6 +520,7 @@ define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1
 ; CHECK-NEXT:    mov w8, w1
 ; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    cmpeq p2.d, p1/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #1 // =0x1
 ; CHECK-NEXT:    mov z0.d, p2/m, x0

diff  --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index c207738d9b6562..5d1d7cf65c09e4 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -5,6 +5,7 @@ define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i6
 ; CHECK-LABEL: insert_v2i64_nxv2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0)
@@ -38,6 +39,7 @@ define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i3
 ; CHECK-LABEL: insert_v4i32_nxv4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
@@ -71,6 +73,7 @@ define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i1
 ; CHECK-LABEL: insert_v8i16_nxv8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0)
@@ -104,6 +107,7 @@ define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i
 ; CHECK-LABEL: insert_v16i8_nxv16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.b, p0/m, z1.b
 ; CHECK-NEXT:    ret
   %retval = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0)
@@ -191,6 +195,7 @@ define void @insert_v2i64_nxv16i64(<2 x i64> %sv0, <2 x i64> %sv1, ptr %out) uwt
 ; CHECK-NEXT:    addvl sp, sp, #-4
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
 ; CHECK-NEXT:    str q1, [sp, #32]
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [sp, #3, mul vl]
@@ -531,6 +536,7 @@ define <vscale x 8 x bfloat> @insert_nxv8bf16_v8bf16(<vscale x 8 x bfloat> %sv0,
 ; CHECK-LABEL: insert_nxv8bf16_v8bf16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
 ; CHECK-NEXT:    ret
   %v0 = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> %sv0, <8 x bfloat> %v1, i64 0)

diff  --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
index 9ca0ae70fa6f8c..8c1b5225b7f257 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll
@@ -147,6 +147,7 @@ define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
   ret i8 %res
@@ -158,6 +159,7 @@ define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
   ret i16 %res
@@ -169,6 +171,7 @@ define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
   ret i32 %res
@@ -420,6 +423,7 @@ define i8 @uaddv_nxv12i8(<vscale x 12 x i8> %a) {
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z1.b
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i8 @llvm.vector.reduce.add.nxv12i8(<vscale x 12 x i8> %a)
   ret i8 %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
index 581b1ed7a989a2..02d172fbc9dbfd 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll
@@ -14,7 +14,9 @@ define i32 @incb(i32 %a) {
 ;
 ; CHECK-LABEL: incb:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incb x0, vl5
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 5)
   %conv = trunc i64 %cnt to i32
@@ -31,7 +33,9 @@ define i32 @incb_mul(i32 %a) {
 ;
 ; CHECK-LABEL: incb_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incb x0, vl8, mul #4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 8)
   %conv = trunc i64 %cnt to i32
@@ -53,7 +57,9 @@ define i32 @decb(i32 %a) {
 ;
 ; CHECK-LABEL: decb:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decb x0, vl6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
   %conv = trunc i64 %cnt to i32
@@ -70,7 +76,9 @@ define i32 @decb_mul(i32 %a) {
 ;
 ; CHECK-LABEL: decb_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decb x0, vl6, mul #8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 6)
   %conv = trunc i64 %cnt to i32
@@ -90,7 +98,9 @@ define i32 @inch(i32 %a) {
 ;
 ; CHECK-LABEL: inch:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    inch x0, #16
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 16)
   %conv = trunc i64 %cnt to i32
@@ -108,7 +118,9 @@ define i32 @inch_mul(i32 %a) {
 ;
 ; CHECK-LABEL: inch_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    inch x0, vl8, mul #5
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 8)
   %conv = trunc i64 %cnt to i32
@@ -130,7 +142,9 @@ define i32 @dech(i32 %a) {
 ;
 ; CHECK-LABEL: dech:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dech x0, vl1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 1)
   %conv = trunc i64 %cnt to i32
@@ -148,7 +162,9 @@ define i32 @dech_mul(i32 %a) {
 ;
 ; CHECK-LABEL: dech_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dech x0, vl16, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 9)
   %conv = trunc i64 %cnt to i32
@@ -170,7 +186,9 @@ define i32 @incw(i32 %a) {
 ;
 ; CHECK-LABEL: incw:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incw x0, #16
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 16)
   %conv = trunc i64 %cnt to i32
@@ -188,7 +206,9 @@ define i32 @incw_mul(i32 %a) {
 ;
 ; CHECK-LABEL: incw_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incw x0, vl32, mul #12
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 10)
   %conv = trunc i64 %cnt to i32
@@ -210,7 +230,9 @@ define i32 @decw(i32 %a) {
 ;
 ; CHECK-LABEL: decw:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decw x0, vl64
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 11)
   %conv = trunc i64 %cnt to i32
@@ -227,7 +249,9 @@ define i32 @decw_mul(i32 %a) {
 ;
 ; CHECK-LABEL: decw_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decw x0, vl128, mul #16
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 12)
   %conv = trunc i64 %cnt to i32
@@ -247,7 +271,9 @@ define i32 @incd(i32 %base) {
 ;
 ; CHECK-LABEL: incd:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incd x0, vl64
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11)
   %conv = trunc i64 %cnt to i32
@@ -265,7 +291,9 @@ define i32 @incd_mul(i32 %base) {
 ;
 ; CHECK-LABEL: incd_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incd x0, vl64, mul #15
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = tail call i64 @llvm.aarch64.sve.cntd(i32 11)
   %conv = trunc i64 %cnt to i32
@@ -287,7 +315,9 @@ define i32 @decd(i32 %a) {
 ;
 ; CHECK-LABEL: decd:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decd x0, #16
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 16)
   %conv = trunc i64 %cnt to i32
@@ -305,7 +335,9 @@ define i32 @decd_mul(i32 %a) {
 ;
 ; CHECK-LABEL: decd_mul:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decd x0, vl2, mul #9
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 2)
   %conv = trunc i64 %cnt to i32

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
index 83f1fc5d14a9e8..bf113a7cabc15a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
@@ -82,6 +82,7 @@ define <vscale x 2 x i64> @dup_imm_i64(i64 %b) {
 define <vscale x 8 x half> @dup_f16(half %b) {
 ; CHECK-LABEL: dup_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half %b)
@@ -91,6 +92,7 @@ define <vscale x 8 x half> @dup_f16(half %b) {
 define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
 ; CHECK-LABEL: dup_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
@@ -109,6 +111,7 @@ define <vscale x 8 x half> @dup_imm_f16(half %b) {
 define <vscale x 4 x float> @dup_f32(float %b) {
 ; CHECK-LABEL: dup_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float %b)
@@ -127,6 +130,7 @@ define <vscale x 4 x float> @dup_imm_f32(float %b) {
 define <vscale x 2 x double> @dup_f64(double %b) {
 ; CHECK-LABEL: dup_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double %b)
@@ -145,7 +149,7 @@ define <vscale x 2 x double> @dup_imm_f64(double %b) {
 define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
 ; CHECK-LABEL: dup_fmov_imm_f32_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
+; CHECK-NEXT:    mov w8, #1109917696
 ; CHECK-NEXT:    mov z0.s, w8
 ; CHECK-NEXT:    ret
   %out = tail call <vscale x 2 x float> @llvm.aarch64.sve.dup.x.nxv2f32(float 4.200000e+01)
@@ -155,7 +159,7 @@ define <vscale x 2 x float> @dup_fmov_imm_f32_2() {
 define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
 ; CHECK-LABEL: dup_fmov_imm_f32_4:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1109917696 // =0x42280000
+; CHECK-NEXT:    mov w8, #1109917696
 ; CHECK-NEXT:    mov z0.s, w8
 ; CHECK-NEXT:    ret
   %out = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float 4.200000e+01)
@@ -165,7 +169,7 @@ define <vscale x 4 x float> @dup_fmov_imm_f32_4() {
 define <vscale x 2 x double> @dup_fmov_imm_f64_2() {
 ; CHECK-LABEL: dup_fmov_imm_f64_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #4631107791820423168 // =0x4045000000000000
+; CHECK-NEXT:    mov x8, #4631107791820423168
 ; CHECK-NEXT:    mov z0.d, x8
 ; CHECK-NEXT:    ret
   %out = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double 4.200000e+01)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
index 95ec7b2b396599..95de90078d3416 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
@@ -8,7 +8,9 @@
 define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fadda_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    fadda h0, p0, h0, z1.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fadda.nxv8f16(<vscale x 8 x i1> %pg,
                                                    half %init,
@@ -19,7 +21,9 @@ define half @fadda_f16(<vscale x 8 x i1> %pg, half %init, <vscale x 8 x half> %a
 define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fadda_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    fadda s0, p0, s0, z1.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fadda.nxv4f32(<vscale x 4 x i1> %pg,
                                                     float %init,
@@ -30,7 +34,9 @@ define float @fadda_f32(<vscale x 4 x i1> %pg, float %init, <vscale x 4 x float>
 define double @fadda_f64(<vscale x 2 x i1> %pg, double %init, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fadda_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fadda.nxv2f64(<vscale x 2 x i1> %pg,
                                                      double %init,
@@ -46,6 +52,7 @@ define half @faddv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: faddv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    faddv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.faddv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
@@ -56,6 +63,7 @@ define float @faddv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: faddv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    faddv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
@@ -66,6 +74,7 @@ define double @faddv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: faddv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    faddv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
@@ -80,6 +89,7 @@ define half @fmaxnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fmaxnmv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fmaxnmv.nxv8f16(<vscale x 8 x i1> %pg,
                                                      <vscale x 8 x half> %a)
@@ -90,6 +100,7 @@ define float @fmaxnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fmaxnmv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1> %pg,
                                                       <vscale x 4 x float> %a)
@@ -100,6 +111,7 @@ define double @fmaxnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fmaxnmv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fmaxnmv.nxv2f64(<vscale x 2 x i1> %pg,
                                                        <vscale x 2 x double> %a)
@@ -114,6 +126,7 @@ define half @fmaxv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fmaxv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
@@ -124,6 +137,7 @@ define float @fmaxv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fmaxv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fmaxv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
@@ -134,6 +148,7 @@ define double @fmaxv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fmaxv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)
@@ -148,6 +163,7 @@ define half @fminnmv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fminnmv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fminnmv.nxv8f16(<vscale x 8 x i1> %pg,
                                                      <vscale x 8 x half> %a)
@@ -158,6 +174,7 @@ define float @fminnmv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fminnmv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1> %pg,
                                                       <vscale x 4 x float> %a)
@@ -168,6 +185,7 @@ define double @fminnmv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fminnmv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fminnmv.nxv2f64(<vscale x 2 x i1> %pg,
                                                        <vscale x 2 x double> %a)
@@ -182,6 +200,7 @@ define half @fminv_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
 ; CHECK-LABEL: fminv_f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.aarch64.sve.fminv.nxv8f16(<vscale x 8 x i1> %pg,
                                                    <vscale x 8 x half> %a)
@@ -192,6 +211,7 @@ define float @fminv_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
 ; CHECK-LABEL: fminv_f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call float @llvm.aarch64.sve.fminv.nxv4f32(<vscale x 4 x i1> %pg,
                                                     <vscale x 4 x float> %a)
@@ -202,6 +222,7 @@ define double @fminv_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
 ; CHECK-LABEL: fminv_f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1> %pg,
                                                      <vscale x 2 x double> %a)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index 3e6f5a0df6fa17..3096b509884765 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -596,6 +596,8 @@ define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
 define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %y) {
 ; CHECK-LABEL: dupq_f32_repeat_complex:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    ret
@@ -611,6 +613,8 @@ define dso_local <vscale x 4 x float> @dupq_f32_repeat_complex(float %x, float %
 define dso_local <vscale x 8 x half> @dupq_f16_repeat_complex(half %x, half %y) {
 ; CHECK-LABEL: dupq_f16_repeat_complex:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $q1
 ; CHECK-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
@@ -631,6 +635,8 @@ define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
 ;
 ; SVE2-LABEL: ext_i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #255
 ; SVE2-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
@@ -647,6 +653,8 @@ define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
 ;
 ; SVE2-LABEL: ext_i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #0
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
@@ -663,6 +671,8 @@ define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
 ;
 ; SVE2-LABEL: ext_i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #4
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
@@ -679,6 +689,8 @@ define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
 ;
 ; SVE2-LABEL: ext_i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #16
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
@@ -695,6 +707,8 @@ define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x b
 ;
 ; SVE2-LABEL: ext_bf16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #6
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
@@ -711,6 +725,8 @@ define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half>
 ;
 ; SVE2-LABEL: ext_f16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #6
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
@@ -727,6 +743,8 @@ define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x floa
 ;
 ; SVE2-LABEL: ext_f32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #16
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
@@ -743,6 +761,8 @@ define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x do
 ;
 ; SVE2-LABEL: ext_f64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    ext z0.b, { z0.b, z1.b }, #40
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
@@ -1134,6 +1154,8 @@ define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8>
 ;
 ; SVE2-LABEL: splice_i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.b, p0, { z0.b, z1.b }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
@@ -1150,6 +1172,8 @@ define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16>
 ;
 ; SVE2-LABEL: splice_i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
@@ -1166,6 +1190,8 @@ define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32>
 ;
 ; SVE2-LABEL: splice_i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
@@ -1182,6 +1208,8 @@ define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
 ;
 ; SVE2-LABEL: splice_i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
@@ -1198,6 +1226,8 @@ define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x b
 ;
 ; SVE2-LABEL: splice_bf16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
@@ -1214,6 +1244,8 @@ define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
 ;
 ; SVE2-LABEL: splice_f16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
@@ -1230,6 +1262,8 @@ define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x flo
 ;
 ; SVE2-LABEL: splice_f32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
@@ -1246,6 +1280,8 @@ define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x do
 ;
 ; SVE2-LABEL: splice_f64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; SVE2-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; SVE2-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; SVE2-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index a51c5de893f258..5127fa7e934803 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -153,6 +153,7 @@ define <vscale x 16 x i1> @chained_reinterpret() {
 define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_h:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    ret
@@ -165,6 +166,7 @@ define <vscale x 16 x i1> @reinterpret_scalar_bool_h(i1 %x){
 define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_s:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
@@ -177,6 +179,7 @@ define <vscale x 16 x i1> @reinterpret_scalar_bool_s(i1 %x){
 define <vscale x 16 x i1> @reinterpret_scalar_bool_q(i1 %x){
 ; CHECK-LABEL: reinterpret_scalar_bool_q:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
index bedef200feeca1..310fba47780258 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -173,6 +173,7 @@ define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) {
 define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
 ; CHECK-LABEL: insr_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    insr z0.h, h1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
@@ -182,6 +183,7 @@ define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
 define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
 ; CHECK-LABEL: insr_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    insr z0.h, h1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
@@ -191,6 +193,7 @@ define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0
 define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
 ; CHECK-LABEL: insr_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    insr z0.s, s1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
@@ -200,6 +203,7 @@ define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
 define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
 ; CHECK-LABEL: insr_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    insr z0.d, d1
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
index 2beda0d664f5ff..f4ab2025f71822 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
@@ -92,7 +92,9 @@ define <vscale x 2 x i64> @sqdecp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 define i32 @sqdecb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
@@ -101,6 +103,7 @@ define i32 @sqdecb_n32_i32(i32 %a) {
 define i64 @sqdecb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecb_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecb x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecb.n32(i32 %a, i32 3, i32 4)
@@ -125,7 +128,9 @@ define i64 @sqdecb_n64(i64 %a) {
 define i32 @sqdech_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdech x0, w0, vl5, mul #6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
@@ -134,6 +139,7 @@ define i32 @sqdech_n32_i32(i32 %a) {
 define i64 @sqdech_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdech_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdech x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdech.n32(i32 %a, i32 3, i32 4)
@@ -158,7 +164,9 @@ define i64 @sqdech_n64(i64 %a) {
 define i32 @sqdecw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecw x0, w0, vl7, mul #8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
@@ -167,6 +175,7 @@ define i32 @sqdecw_n32_i32(i32 %a) {
 define i64 @sqdecw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecw_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecw x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecw.n32(i32 %a, i32 3, i32 4)
@@ -191,7 +200,9 @@ define i64 @sqdecw_n64(i64 %a) {
 define i32 @sqdecd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecd x0, w0, vl16, mul #10
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
@@ -200,6 +211,7 @@ define i32 @sqdecd_n32_i32(i32 %a) {
 define i64 @sqdecd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqdecd_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecd x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecd.n32(i32 %a, i32 3, i32 4)
@@ -224,7 +236,9 @@ define i64 @sqdecd_n64(i64 %a) {
 define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.b, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
@@ -233,6 +247,7 @@ define i32 @sqdecp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b8_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.b, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
@@ -244,7 +259,9 @@ define i64 @sqdecp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.h, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
@@ -253,6 +270,7 @@ define i32 @sqdecp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b16_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.h, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
@@ -264,7 +282,9 @@ define i64 @sqdecp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.s, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
@@ -273,6 +293,7 @@ define i32 @sqdecp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.s, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
@@ -284,7 +305,9 @@ define i64 @sqdecp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.d, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
@@ -293,6 +316,7 @@ define i32 @sqdecp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 define i64 @sqdecp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqdecp_n32_b64_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqdecp x0, p0.d, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
index 449ad444198b75..aa3403b714cbcc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
@@ -92,7 +92,9 @@ define <vscale x 2 x i64> @sqincp_b64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %
 define i32 @sqincb_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
   ret i32 %out
@@ -101,6 +103,7 @@ define i32 @sqincb_n32_i32(i32 %a) {
 define i64 @sqincb_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincb_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincb x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincb.n32(i32 %a, i32 3, i32 4)
@@ -125,7 +128,9 @@ define i64 @sqincb_n64(i64 %a) {
 define i32 @sqinch_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqinch x0, w0, vl5, mul #6
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 5, i32 6)
   ret i32 %out
@@ -134,6 +139,7 @@ define i32 @sqinch_n32_i32(i32 %a) {
 define i64 @sqinch_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqinch_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqinch x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqinch.n32(i32 %a, i32 3, i32 4)
@@ -158,7 +164,9 @@ define i64 @sqinch_n64(i64 %a) {
 define i32 @sqincw_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincw x0, w0, vl7, mul #8
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 7, i32 8)
   ret i32 %out
@@ -167,6 +175,7 @@ define i32 @sqincw_n32_i32(i32 %a) {
 define i64 @sqincw_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincw_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincw x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincw.n32(i32 %a, i32 3, i32 4)
@@ -191,7 +200,9 @@ define i64 @sqincw_n64(i64 %a) {
 define i32 @sqincd_n32_i32(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincd x0, w0, vl16, mul #10
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 9, i32 10)
   ret i32 %out
@@ -200,6 +211,7 @@ define i32 @sqincd_n32_i32(i32 %a) {
 define i64 @sqincd_n32_i64(i32 %a) {
 ; CHECK-LABEL: sqincd_n32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincd x0, w0, vl3, mul #4
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincd.n32(i32 %a, i32 3, i32 4)
@@ -224,7 +236,9 @@ define i64 @sqincd_n64(i64 %a) {
 define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.b, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
   ret i32 %out
@@ -233,6 +247,7 @@ define i32 @sqincp_n32_b8_i32(i32 %a, <vscale x 16 x i1> %b) {
 define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b8_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.b, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 %a, <vscale x 16 x i1> %b)
@@ -244,7 +259,9 @@ define i64 @sqincp_n32_b8_i64(i32 %a, <vscale x 16 x i1> %b) {
 define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.h, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
   ret i32 %out
@@ -253,6 +270,7 @@ define i32 @sqincp_n32_b16_i32(i32 %a, <vscale x 8 x i1> %b) {
 define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b16_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.h, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 %a, <vscale x 8 x i1> %b)
@@ -264,7 +282,9 @@ define i64 @sqincp_n32_b16_i64(i32 %a, <vscale x 8 x i1> %b) {
 define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.s, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
   ret i32 %out
@@ -273,6 +293,7 @@ define i32 @sqincp_n32_b32_i32(i32 %a, <vscale x 4 x i1> %b) {
 define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b32_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.s, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 %a, <vscale x 4 x i1> %b)
@@ -284,7 +305,9 @@ define i64 @sqincp_n32_b32_i64(i32 %a, <vscale x 4 x i1> %b) {
 define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.d, w0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)
   ret i32 %out
@@ -293,6 +316,7 @@ define i32 @sqincp_n32_b64_i32(i32 %a, <vscale x 2 x i1> %b) {
 define i64 @sqincp_n32_b64_i64(i32 %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: sqincp_n32_b64_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sqincp x0, p0.d, w0
 ; CHECK-NEXT:    ret
   %out = call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 %a, <vscale x 2 x i1> %b)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index b8d171dac45e2f..579358ecb887ef 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -15,6 +15,8 @@
 define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 2, i64 0
@@ -28,7 +30,9 @@ define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -42,7 +46,9 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vsca
 define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #-18
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -18, i64 0
@@ -56,7 +62,9 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    rdvl x8, #16
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 16, i64 0
@@ -70,6 +78,8 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16, i64 0
@@ -83,6 +93,8 @@ define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14, i64 0
@@ -100,6 +112,8 @@ define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 2, i64 0
@@ -113,6 +127,8 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 2, i64 0
@@ -130,6 +146,8 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 4, i64 0
@@ -143,6 +161,8 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 6, i64 0
@@ -160,6 +180,8 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 8, i64 0
@@ -173,6 +195,8 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 10, i64 0
@@ -190,6 +214,9 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 3, i64 0
@@ -204,7 +231,10 @@ define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #4
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -219,7 +249,10 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <v
 define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #5
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -234,7 +267,10 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <v
 define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #-27
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -27, i64 0
@@ -249,7 +285,10 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    rdvl x8, #24
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 24, i64 0
@@ -264,6 +303,9 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24, i64 0
@@ -278,6 +320,9 @@ define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21, i64 0
@@ -296,6 +341,9 @@ define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #6, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 6, i64 0
@@ -310,6 +358,9 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, #9, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 9, i64 0
@@ -328,6 +379,9 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 12, i64 0
@@ -342,6 +396,9 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, #15, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 15, i64 0
@@ -360,6 +417,9 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #18, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 18, i64 0
@@ -374,6 +434,9 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 -3, i64 0
@@ -392,6 +455,10 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #4, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 4, i64 0
@@ -407,7 +474,11 @@ define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <
 define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #5
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 5, i64 0
@@ -423,7 +494,11 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <v
 define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #6
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 6, i64 0
@@ -439,7 +514,11 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <v
 define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    rdvl x8, #7
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 7, i64 0
@@ -457,8 +536,12 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov x9, #-576 // =0xfffffffffffffdc0
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
@@ -480,8 +563,12 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    rdvl x8, #1
 ; CHECK-NEXT:    mov w9, #512 // =0x200
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    mul x8, x8, x9
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x8]
 ; CHECK-NEXT:    ret
 ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
@@ -501,6 +588,10 @@ define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vsc
 define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32, i64 0
@@ -516,6 +607,10 @@ define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28, i64 0
@@ -535,6 +630,10 @@ define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #8, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 8, i64 0
@@ -550,6 +649,10 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, #12, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 12, i64 0
@@ -569,6 +672,10 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 16, i64 0
@@ -584,6 +691,10 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, #20, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 20, i64 0
@@ -603,6 +714,10 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 24, i64 0
@@ -618,6 +733,10 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28, i64 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index 8a7a5b7be34a90..c8fc8d7a70cc68 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -9,6 +9,8 @@
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2b_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -26,6 +28,8 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -39,6 +43,8 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -56,6 +62,8 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -69,6 +77,8 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -86,6 +96,8 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -99,6 +111,8 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
@@ -116,6 +130,9 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3b_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -134,6 +151,9 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -148,6 +168,9 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -166,6 +189,9 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -180,6 +206,9 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -198,6 +227,9 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -212,6 +244,9 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset
@@ -230,6 +265,10 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4b_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0, x1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i8, ptr %addr, i64 %offset
@@ -249,6 +288,10 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i16, ptr %addr, i64 %offset
@@ -264,6 +307,10 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1]
 ; CHECK-NEXT:    ret
   %1 = getelementptr half, ptr %addr, i64 %offset
@@ -283,6 +330,10 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i32, ptr %addr, i64 %offset
@@ -298,6 +349,10 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2]
 ; CHECK-NEXT:    ret
   %1 = getelementptr float, ptr %addr, i64 %offset
@@ -317,6 +372,10 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i64, ptr %addr, i64 %offset
@@ -332,6 +391,10 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3]
 ; CHECK-NEXT:    ret
   %1 = getelementptr double, ptr %addr, i64 %offset

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index 5aa0fd7eb7e453..b09baa6bf7e0ac 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -9,6 +9,8 @@
 define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2b_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2b { z0.b, z1.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
@@ -25,6 +27,8 @@ define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
@@ -37,6 +41,8 @@ define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
@@ -49,6 +55,8 @@ define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st2h_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2h { z0.h, z1.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -65,6 +73,8 @@ define void @st2h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
@@ -77,6 +87,8 @@ define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
@@ -93,6 +105,8 @@ define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
@@ -105,6 +119,8 @@ define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
@@ -117,6 +133,8 @@ define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2d_ptr:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2d { z0.d, z1.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st2.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -133,6 +151,9 @@ define void @st2d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
 define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3b_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3b { z0.b - z2.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
@@ -150,6 +171,9 @@ define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
@@ -163,6 +187,9 @@ define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
@@ -176,6 +203,9 @@ define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st3h_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3h { z0.h - z2.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -193,6 +223,9 @@ define void @st3h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
@@ -206,6 +239,9 @@ define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3w { z0.s - z2.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
@@ -223,6 +259,9 @@ define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
@@ -236,6 +275,9 @@ define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
@@ -249,6 +291,9 @@ define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3d_ptr:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3d { z0.d - z2.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st3.nxv2p0(<vscale x 2 x ptr> %v0,
@@ -266,6 +311,10 @@ define void @st3d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x
 define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4b_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4b { z0.b - z3.b }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
@@ -284,6 +333,10 @@ define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 1
 define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
@@ -298,6 +351,10 @@ define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x
 define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4h_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
@@ -312,6 +369,10 @@ define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale
 define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) #0 {
 ; CHECK-LABEL: st4h_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4h { z0.h - z3.h }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> %v0,
@@ -330,6 +391,10 @@ define void @st4h_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vs
 define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
@@ -344,6 +409,10 @@ define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x
 define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4w_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4w { z0.s - z3.s }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
@@ -362,6 +431,10 @@ define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscal
 define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
@@ -376,6 +449,10 @@ define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x
 define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
@@ -390,6 +467,10 @@ define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vsc
 define void @st4d_ptr(<vscale x 2 x ptr> %v0, <vscale x 2 x ptr> %v1, <vscale x 2 x ptr> %v2, <vscale x 2 x ptr> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4d_ptr:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4d { z0.d - z3.d }, p0, [x0]
 ; CHECK-NEXT:    ret
   call void @llvm.aarch64.sve.st4.nxv2p0(<vscale x 2 x ptr> %v0,

diff  --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
index a326da2b20f4a0..47758893ce7117 100644
--- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll
@@ -11,10 +11,10 @@ declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x
 define void @foo1(ptr %outval, <vscale x 2 x i1> %pred, ptr %inptr) {
 ; CHECK-LABEL: foo1:
 ; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1]
-; CHECK-NEXT: faddv d0, p0, z0.d
-; CHECK-NEXT: faddv d1, p0, z1.d
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: faddv d2, p0, z0.d
+; CHECK-NEXT: faddv d0, p0, z1.d
+; CHECK-NEXT: mov v2.d[1], v0.d[0]
+; CHECK-NEXT: str q2, [x0]
   %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1
   %1 = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> %pred, ptr nonnull %inptr)
   %2 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %1, 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
index 0853f80a755407..bcb878ad744bb4 100644
--- a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll
@@ -10,6 +10,7 @@ define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind {
 ; CHECK-NEXT:    cmlt v0.4s, v0.4s, #0
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
   %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0
   ret <4 x i32> %load
@@ -20,6 +21,7 @@ define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    shl v1.4s, v1.4s, #31
 ; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0

diff  --git a/llvm/test/CodeGen/AArch64/sve-pr62151.ll b/llvm/test/CodeGen/AArch64/sve-pr62151.ll
index 36594baa249164..5ed34f14a0b140 100644
--- a/llvm/test/CodeGen/AArch64/sve-pr62151.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pr62151.ll
@@ -7,6 +7,7 @@ define i32 @build_interpolation(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    mul v0.2s, v1.2s, v0.2s
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z2.s
 ; CHECK-NEXT:    mla v0.2s, v1.2s, v0.s[1]
 ; CHECK-NEXT:    fmov w0, s0

diff  --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index e60a3ece699cd8..8438e9d88f5de0 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -1064,8 +1064,9 @@ entry:
 define <vscale x 4 x float> @faddqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) {
 ; CHECK-LABEL: faddqr_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.s, s2
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, s2
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fadd z1.s, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
@@ -1082,8 +1083,9 @@ entry:
 define <vscale x 8 x half> @faddqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) {
 ; CHECK-LABEL: faddqr_v8f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.h, h2
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, h2
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    fadd z1.h, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
@@ -1100,8 +1102,9 @@ entry:
 define <vscale x 4 x float> @fsubqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) {
 ; CHECK-LABEL: fsubqr_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.s, s2
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, s2
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fsub z1.s, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
@@ -1118,8 +1121,9 @@ entry:
 define <vscale x 8 x half> @fsubqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) {
 ; CHECK-LABEL: fsubqr_v8f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.h, h2
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, h2
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    fsub z1.h, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h
@@ -1136,8 +1140,9 @@ entry:
 define <vscale x 4 x float> @fmulqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) {
 ; CHECK-LABEL: fmulqr_v4f32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.s, s2
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    mov z2.s, s2
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fmul z1.s, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, z1.s
@@ -1154,8 +1159,9 @@ entry:
 define <vscale x 8 x half> @fmulqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) {
 ; CHECK-LABEL: fmulqr_v8f16:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov z2.h, h2
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    mov z2.h, h2
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, #0.0
 ; CHECK-NEXT:    fmul z1.h, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, z1.h

diff  --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index 2a1bfc4168af4b..b1270165556e67 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -4,6 +4,7 @@
 define <vscale x  1 x i8> @select_nxv1i8(i1 %cond, <vscale x  1 x i8> %a, <vscale x  1 x i8> %b) {
 ; CHECK-LABEL: select_nxv1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
@@ -15,6 +16,7 @@ define <vscale x  1 x i8> @select_nxv1i8(i1 %cond, <vscale x  1 x i8> %a, <vscal
 define <vscale x  16 x i8> @select_nxv16i8(i1 %cond, <vscale x  16 x i8> %a, <vscale x  16 x i8> %b) {
 ; CHECK-LABEL: select_nxv16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
@@ -26,6 +28,7 @@ define <vscale x  16 x i8> @select_nxv16i8(i1 %cond, <vscale x  16 x i8> %a, <vs
 define <vscale x  1 x i16> @select_nxv1i16(i1 %cond, <vscale x  1 x i16> %a, <vscale x  1 x i16> %b) {
 ; CHECK-LABEL: select_nxv1i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
@@ -37,6 +40,7 @@ define <vscale x  1 x i16> @select_nxv1i16(i1 %cond, <vscale x  1 x i16> %a, <vs
 define <vscale x  8 x i16> @select_nxv8i16(i1 %cond, <vscale x  8 x i16> %a, <vscale x  8 x i16> %b) {
 ; CHECK-LABEL: select_nxv8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
@@ -48,6 +52,7 @@ define <vscale x  8 x i16> @select_nxv8i16(i1 %cond, <vscale x  8 x i16> %a, <vs
 define <vscale x  1 x i32> @select_nxv1i32(i1 %cond, <vscale x  1 x i32> %a, <vscale x  1 x i32> %b) {
 ; CHECK-LABEL: select_nxv1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
@@ -59,6 +64,7 @@ define <vscale x  1 x i32> @select_nxv1i32(i1 %cond, <vscale x  1 x i32> %a, <vs
 define <vscale x  4 x i32> @select_nxv4i32(i1 %cond, <vscale x  4 x i32> %a, <vscale x  4 x i32> %b) {
 ; CHECK-LABEL: select_nxv4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
@@ -70,6 +76,7 @@ define <vscale x  4 x i32> @select_nxv4i32(i1 %cond, <vscale x  4 x i32> %a, <vs
 define <vscale x  1 x i64> @select_nxv1i64(i1 %cond, <vscale x  1 x i64> %a, <vscale x  1 x i64> %b) {
 ; CHECK-LABEL: select_nxv1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
@@ -81,6 +88,7 @@ define <vscale x  1 x i64> @select_nxv1i64(i1 %cond, <vscale x  1 x i64> %a, <vs
 define <vscale x  2 x i64> @select_nxv2i64(i1 %cond, <vscale x  2 x i64> %a, <vscale x  2 x i64> %b) {
 ; CHECK-LABEL: select_nxv2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
@@ -92,6 +100,7 @@ define <vscale x  2 x i64> @select_nxv2i64(i1 %cond, <vscale x  2 x i64> %a, <vs
 define <vscale x  8 x half> @select_nxv8f16(i1 %cond, <vscale x  8 x half> %a, <vscale x  8 x half> %b) {
 ; CHECK-LABEL: select_nxv8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
@@ -103,6 +112,7 @@ define <vscale x  8 x half> @select_nxv8f16(i1 %cond, <vscale x  8 x half> %a, <
 define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a, <vscale x  4 x float> %b) {
 ; CHECK-LABEL: select_nxv4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
@@ -114,6 +124,7 @@ define <vscale x  4 x float> @select_nxv4f32(i1 %cond, <vscale x  4 x float> %a,
 define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %a, <vscale x  2 x double> %b) {
 ; CHECK-LABEL: select_nxv2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
@@ -125,6 +136,7 @@ define <vscale x  2 x double> @select_nxv2f64(i1 %cond, <vscale x  2 x double> %
 define <vscale x  16 x i1> @select_nxv16i1(i1 %cond, <vscale x  16 x i1> %a, <vscale x  16 x i1> %b) {
 ; CHECK-LABEL: select_nxv16i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.b, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -136,6 +148,7 @@ define <vscale x  16 x i1> @select_nxv16i1(i1 %cond, <vscale x  16 x i1> %a, <vs
 define <vscale x  8 x i1> @select_nxv8i1(i1 %cond, <vscale x  8 x i1> %a, <vscale x  8 x i1> %b) {
 ; CHECK-LABEL: select_nxv8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.h, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -147,6 +160,7 @@ define <vscale x  8 x i1> @select_nxv8i1(i1 %cond, <vscale x  8 x i1> %a, <vscal
 define <vscale x  4 x i1> @select_nxv4i1(i1 %cond, <vscale x  4 x i1> %a, <vscale x  4 x i1> %b) {
 ; CHECK-LABEL: select_nxv4i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.s, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -158,6 +172,7 @@ define <vscale x  4 x i1> @select_nxv4i1(i1 %cond, <vscale x  4 x i1> %a, <vscal
 define <vscale x  2 x i1> @select_nxv2i1(i1 %cond, <vscale x  2 x i1> %a, <vscale x  2 x i1> %b) {
 ; CHECK-LABEL: select_nxv2i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.d, xzr, x8
 ; CHECK-NEXT:    sel p0.b, p2, p0.b, p1.b
@@ -169,6 +184,7 @@ define <vscale x  2 x i1> @select_nxv2i1(i1 %cond, <vscale x  2 x i1> %a, <vscal
 define <vscale x  1 x i1> @select_nxv1i1(i1 %cond, <vscale x  1 x i1> %a, <vscale x  1 x i1> %b) {
 ; CHECK-LABEL: select_nxv1i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p2.d, xzr, x8
 ; CHECK-NEXT:    punpklo p2.h, p2.b

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
index 9012f38a9742f7..76190eba870ded 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll
@@ -9,6 +9,7 @@ define i32 @promote_extract_2i32_idx(<vscale x 2 x i32> %a, i32 %idx) {
 ; CHECK-NEXT:    mov w8, w0
 ; CHECK-NEXT:    whilels p0.d, xzr, x8
 ; CHECK-NEXT:    lastb x0, p0, z0.d
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %ext = extractelement <vscale x 2 x i32> %a, i32 %idx
   ret i32 %ext

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
index 7da0a25c357034..696b6c34ef0415 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll
@@ -7,10 +7,12 @@ define double @fadda_nxv8f64(double %init, <vscale x 8 x double> %a) {
 ; CHECK-LABEL: fadda_nxv8f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fadda d0, p0, d0, z1.d
 ; CHECK-NEXT:    fadda d0, p0, d0, z2.d
 ; CHECK-NEXT:    fadda d0, p0, d0, z3.d
 ; CHECK-NEXT:    fadda d0, p0, d0, z4.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fadd.nxv8f64(double %init, <vscale x 8 x double> %a)
   ret double %res
@@ -40,6 +42,7 @@ define double @fmaxv_nxv8f64(<vscale x 8 x double> %a) {
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z2.d
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmax.nxv8f64(<vscale x 8 x double> %a)
   ret double %res
@@ -53,6 +56,7 @@ define half @fminv_nxv16f16(<vscale x 16 x half> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fmin.nxv16f16(<vscale x 16 x half> %a)
   ret half %res
@@ -68,6 +72,7 @@ define double @fmaximumv_nxv8f64(<vscale x 8 x double> %a) {
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z2.d
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call double @llvm.vector.reduce.fmaximum.nxv8f64(<vscale x 8 x double> %a)
   ret double %res
@@ -81,6 +86,7 @@ define half @fminimumv_nxv16f16(<vscale x 16 x half> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
   %res = call half @llvm.vector.reduce.fminimum.nxv16f16(<vscale x 16 x half> %a)
   ret half %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
index c6bc11a1d681ff..dd7b15ef5ee6f4 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-reduce.ll
@@ -34,6 +34,7 @@ define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
   ret i32 %res
@@ -61,6 +62,7 @@ define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
   ret i16 %res
@@ -86,6 +88,7 @@ define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
   ret i16 %res
@@ -98,6 +101,7 @@ define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
   ret i16 %res
@@ -112,6 +116,7 @@ define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
   ret i32 %res
@@ -126,6 +131,7 @@ define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
   ret i32 %res

diff  --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index c9d73e3771aa05..ec94198a08ca7b 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefix=CHECK
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefix=CHECK-FRAMELAYOUT
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=stack-frame-layout 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK-FRAMELAYOUT
 
 ; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
 ; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
@@ -168,6 +168,7 @@ define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    .cfi_offset w29, -24
 ; CHECK-NEXT:    .cfi_offset b8, -32
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    ubfiz x8, x0, #2, #32
 ; CHECK-NEXT:    mov x9, sp
 ; CHECK-NEXT:    add x8, x8, #15
@@ -601,5 +602,3 @@ entry:
   ret i32 %x
 }
 declare void @other()
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FRAMELAYOUT: {{.*}}

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
index 63e20b55b10a40..478072d33d8c9b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll
@@ -10,8 +10,10 @@ define <4 x i8> @vls_sve_and_4xi8(<4 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi8:
@@ -34,8 +36,10 @@ define <8 x i8> @vls_sve_and_8xi8(<8 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI1_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi8:
@@ -64,8 +68,10 @@ define <16 x i8> @vls_sve_and_16xi8(<16 x i8> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_16xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI2_0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi8:
@@ -106,9 +112,13 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 ; CHECK-LABEL: vls_sve_and_32xi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_32xi8:
@@ -174,9 +184,11 @@ define <32 x i8> @vls_sve_and_32xi8(<32 x i8> %ap) nounwind {
 define <2 x i16> @vls_sve_and_2xi16(<2 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov s1, wzr
+; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi16:
@@ -195,8 +207,10 @@ define <4 x i16> @vls_sve_and_4xi16(<4 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI5_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi16:
@@ -219,8 +233,10 @@ define <8 x i16> @vls_sve_and_8xi16(<8 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI6_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi16:
@@ -249,9 +265,13 @@ define <16 x i16> @vls_sve_and_16xi16(<16 x i16> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_16xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_16xi16:
@@ -293,7 +313,9 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z1.s, #0, #-1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi32:
@@ -312,8 +334,10 @@ define <4 x i32> @vls_sve_and_4xi32(<4 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI9_0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI9_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi32:
@@ -334,9 +358,13 @@ define <8 x i32> @vls_sve_and_8xi32(<8 x i32> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_8xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI10_0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI10_0]
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_8xi32:
@@ -362,7 +390,9 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_2xi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z1.d, #0, #-1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_2xi64:
@@ -381,8 +411,12 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind {
 ; CHECK-LABEL: vls_sve_and_4xi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    index z2.d, #0, #-1
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    and z0.d, z0.d, z2.d
 ; CHECK-NEXT:    and z1.d, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: vls_sve_and_4xi64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index b6e732a7b9b114..bd49db8a4c4149 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -12,10 +12,12 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @ctlz_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: ctlz_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sub z0.h, z0.h, #8 // =0x8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i8:
@@ -49,7 +51,9 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: ctlz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v8i8:
@@ -99,7 +103,9 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: ctlz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v16i8:
@@ -333,10 +339,12 @@ define void @ctlz_v32i8(ptr %a) {
 define <2 x i16> @ctlz_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: ctlz_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
 ; CHECK-NEXT:    sub z0.s, z0.s, #16 // =0x10
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i16:
@@ -361,7 +369,9 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: ctlz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i16:
@@ -395,7 +405,9 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: ctlz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v8i16:
@@ -534,7 +546,9 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: ctlz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i32:
@@ -557,7 +571,9 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: ctlz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v4i32:
@@ -630,7 +646,9 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: ctlz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v1i64:
@@ -651,7 +669,9 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: ctlz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctlz_v2i64:
@@ -712,9 +732,11 @@ define void @ctlz_v4i64(ptr %a) {
 define <4 x i8> @ctpop_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: ctpop_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i8:
@@ -785,7 +807,9 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: ctpop_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v8i8:
@@ -908,7 +932,9 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: ctpop_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v16i8:
@@ -1576,9 +1602,11 @@ define void @ctpop_v32i8(ptr %a) {
 define <2 x i16> @ctpop_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: ctpop_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i16:
@@ -1622,7 +1650,9 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: ctpop_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i16:
@@ -1693,7 +1723,9 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: ctpop_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v8i16:
@@ -2050,7 +2082,9 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: ctpop_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i32:
@@ -2094,7 +2128,9 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: ctpop_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v4i32:
@@ -2289,7 +2325,9 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: ctpop_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v1i64:
@@ -2321,7 +2359,9 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: ctpop_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cnt z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ctpop_v2i64:
@@ -2444,10 +2484,12 @@ define void @ctpop_v4i64(ptr %a) {
 define <4 x i8> @cttz_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: cttz_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr z0.h, z0.h, #0x100
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    orr z0.h, z0.h, #0x100
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i8:
@@ -2485,8 +2527,10 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: cttz_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v8i8:
@@ -2544,8 +2588,10 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: cttz_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
 ; CHECK-NEXT:    clz z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v16i8:
@@ -2829,10 +2875,12 @@ define void @cttz_v32i8(ptr %a) {
 define <2 x i16> @cttz_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: cttz_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr z0.s, z0.s, #0x10000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    orr z0.s, z0.s, #0x10000
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i16:
@@ -2859,8 +2907,10 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: cttz_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i16:
@@ -2898,8 +2948,10 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: cttz_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    clz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v8i16:
@@ -3064,8 +3116,10 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: cttz_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i32:
@@ -3090,8 +3144,10 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: cttz_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    clz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v4i32:
@@ -3178,8 +3234,10 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: cttz_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v1i64:
@@ -3201,8 +3259,10 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: cttz_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
 ; CHECK-NEXT:    clz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: cttz_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
index 4be262fd3f33fc..d3c446c9904b2b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll
@@ -28,6 +28,8 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %r
 ; CHECK-NEXT:    and z2.d, z2.d, z5.d
 ; CHECK-NEXT:    orr z1.d, z3.d, z1.d
 ; CHECK-NEXT:    orr z0.d, z0.d, z2.d
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_bitselect_v8i32:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
index fd043e3afe4a86..9729a1d95cd916 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll
@@ -250,6 +250,8 @@ define void @build_vector_non_const_v4i1(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) {
 define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
@@ -271,6 +273,8 @@ define void @build_vector_non_const_v2f64(double %a, double %b, ptr %out) {
 define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -293,6 +297,10 @@ define void @build_vector_non_const_v2f32(float %a, float %b, ptr %out) {
 define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d, ptr %out)  {
 ; CHECK-LABEL: build_vector_non_const_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $z2
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
+; CHECK-NEXT:    // kill: def $s3 killed $s3 def $z3
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $z1
 ; CHECK-NEXT:    zip1 z2.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z2.d
@@ -320,6 +328,10 @@ define void @build_vector_non_const_v4f32(float %a, float %b, float %c, float %d
 define void @build_vector_non_const_v4f64(double %a, double %b, double %c, double %d, ptr %out)  {
 ; CHECK-LABEL: build_vector_non_const_v4f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d3 killed $d3 def $z3
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z2.d, z2.d, z3.d
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
 ; CHECK-NEXT:    stp q0, q2, [x0]
@@ -345,6 +357,14 @@ define void @build_vector_non_const_v4f64(double %a, double %b, double %c, doubl
 define void @build_vector_non_const_v8f16(half %a, half %b, half %c, half %d, half %e, half %f, half %g, half %h, ptr %out) {
 ; CHECK-LABEL: build_vector_non_const_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h6 killed $h6 def $z6
+; CHECK-NEXT:    // kill: def $h4 killed $h4 def $z4
+; CHECK-NEXT:    // kill: def $h2 killed $h2 def $z2
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
+; CHECK-NEXT:    // kill: def $h7 killed $h7 def $z7
+; CHECK-NEXT:    // kill: def $h5 killed $h5 def $z5
+; CHECK-NEXT:    // kill: def $h3 killed $h3 def $z3
+; CHECK-NEXT:    // kill: def $h1 killed $h1 def $z1
 ; CHECK-NEXT:    zip1 z6.h, z6.h, z7.h
 ; CHECK-NEXT:    zip1 z4.h, z4.h, z5.h
 ; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
index 5ec63396a2f5f1..619840fc6afb28 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll
@@ -12,6 +12,8 @@ target triple = "aarch64-unknown-linux-gnu"
 define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 ; CHECK-LABEL: concat_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.h, z1.h[3]
 ; CHECK-NEXT:    mov z3.h, z1.h[2]
 ; CHECK-NEXT:    mov z4.h, z1.h[1]
@@ -25,6 +27,7 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8i8:
@@ -58,8 +61,11 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2)  {
 define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2)  {
 ; CHECK-LABEL: concat_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v16i8:
@@ -134,11 +140,14 @@ define void @concat_v64i8(ptr %a, ptr %b, ptr %c) {
 define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
 ; CHECK-LABEL: concat_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    mov z3.s, z0.s[1]
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4i16:
@@ -163,8 +172,11 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2)  {
 define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2)  {
 ; CHECK-LABEL: concat_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8i16:
@@ -233,7 +245,10 @@ define void @concat_v32i16(ptr %a, ptr %b, ptr %c) {
 define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
 ; CHECK-LABEL: concat_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2i32:
@@ -255,8 +270,11 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2)  {
 define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2)  {
 ; CHECK-LABEL: concat_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4i32:
@@ -322,8 +340,11 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) {
 define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2)  {
 ; CHECK-LABEL: concat_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2i64:
@@ -385,27 +406,13 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) {
 ;
 
 define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
-; SVE2-LABEL: concat_v4f16:
-; SVE2:       // %bb.0:
-; SVE2-NEXT:    cnth x8
-; SVE2-NEXT:    adrp x9, .LCPI15_0
-; SVE2-NEXT:    adrp x10, .LCPI15_1
-; SVE2-NEXT:    mov z2.h, w8
-; SVE2-NEXT:    ldr q3, [x9, :lo12:.LCPI15_0]
-; SVE2-NEXT:    ldr q4, [x10, :lo12:.LCPI15_1]
-; SVE2-NEXT:    ptrue p0.h, vl8
-; SVE2-NEXT:    mad z2.h, p0/m, z3.h, z4.h
-; SVE2-NEXT:    tbl z0.h, { z0.h, z1.h }, z2.h
-; SVE2-NEXT:    ret
-;
-; SME-LABEL: concat_v4f16:
-; SME:       // %bb.0:
-; SME-NEXT:    mov z2.h, z1.h[1]
-; SME-NEXT:    mov z3.h, z0.h[1]
-; SME-NEXT:    zip1 z1.h, z1.h, z2.h
-; SME-NEXT:    zip1 z0.h, z0.h, z3.h
-; SME-NEXT:    zip1 z0.s, z0.s, z1.s
-; SME-NEXT:    ret
+; CHECK-LABEL: concat_v4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4f16:
 ; NONEON-NOSVE:       // %bb.0:
@@ -425,8 +432,11 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2)  {
 define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2)  {
 ; CHECK-LABEL: concat_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v8f16:
@@ -495,7 +505,10 @@ define void @concat_v32f16(ptr %a, ptr %b, ptr %c) {
 define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
 ; CHECK-LABEL: concat_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2f32:
@@ -517,8 +530,11 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2)  {
 define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2)  {
 ; CHECK-LABEL: concat_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v4f32:
@@ -584,8 +600,11 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) {
 define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2)  {
 ; CHECK-LABEL: concat_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: concat_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
index 8d009867232bf2..7d6336a43a4fd1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll
@@ -10,6 +10,7 @@ define <8 x i16> @load_zext_v8i8i16(ptr %ap)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v8i8i16:
@@ -47,6 +48,7 @@ define <4 x i32> @load_zext_v4i16i32(ptr %ap)  {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v4i16i32:
@@ -74,6 +76,7 @@ define <2 x i64> @load_zext_v2i32i64(ptr %ap) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v2i32i64:
@@ -137,6 +140,10 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap)  {
 ; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, x8]
 ; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, x9]
 ; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, x10]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v16i8i32:
@@ -223,6 +230,8 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap)  {
 ; CHECK-NEXT:    mov x8, #4 // =0x4
 ; CHECK-NEXT:    ld1sh { z1.s }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_sext_v8i16i32:
@@ -389,6 +398,14 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap)  {
 ; CHECK-NEXT:    ld1h { z5.d }, p0/z, [x0, x9, lsl #1]
 ; CHECK-NEXT:    ld1h { z6.d }, p0/z, [x0, x8, lsl #1]
 ; CHECK-NEXT:    ld1h { z7.d }, p0/z, [x0, x10, lsl #1]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
+; CHECK-NEXT:    // kill: def $q2 killed $q2 killed $z2
+; CHECK-NEXT:    // kill: def $q3 killed $q3 killed $z3
+; CHECK-NEXT:    // kill: def $q4 killed $q4 killed $z4
+; CHECK-NEXT:    // kill: def $q5 killed $q5 killed $z5
+; CHECK-NEXT:    // kill: def $q6 killed $q6 killed $z6
+; CHECK-NEXT:    // kill: def $q7 killed $q7 killed $z7
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_zext_v16i16i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
index 6adab37b84e35b..35dd827bbabc55 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll
@@ -10,6 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 ; CHECK-LABEL: extract_subvector_v8i1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov z3.b, z0.b[5]
@@ -17,6 +18,7 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i1:
@@ -43,6 +45,7 @@ define <4 x i1> @extract_subvector_v8i1(<8 x i1> %op) {
 define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov z3.b, z0.b[5]
@@ -50,6 +53,7 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i8:
@@ -74,7 +78,9 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) {
 define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: extract_subvector_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v16i8:
@@ -111,8 +117,10 @@ define void @extract_subvector_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4i16:
@@ -130,7 +138,9 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) {
 define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: extract_subvector_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8i16:
@@ -167,7 +177,9 @@ define void @extract_subvector_v16i16(ptr %a, ptr %b) {
 define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2i32:
@@ -186,7 +198,9 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) {
 define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: extract_subvector_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4i32:
@@ -223,7 +237,9 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: extract_subvector_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2i64:
@@ -260,7 +276,9 @@ define void @extract_subvector_v4i64(ptr %a, ptr %b) {
 define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4f16:
@@ -279,7 +297,9 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) {
 define <4 x half> @extract_subvector_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: extract_subvector_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v8f16:
@@ -316,7 +336,9 @@ define void @extract_subvector_v16f16(ptr %a, ptr %b) {
 define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2f32:
@@ -335,7 +357,9 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) {
 define <2 x float> @extract_subvector_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: extract_subvector_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v4f32:
@@ -372,7 +396,9 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) {
 define <1 x double> @extract_subvector_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: extract_subvector_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extract_subvector_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index 71a0052f22164e..cf308e6c4395ff 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -12,7 +12,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @extractelement_v2f16(<2 x half> %op1) {
 ; CHECK-LABEL: extractelement_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[1]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f16:
@@ -30,7 +32,9 @@ define half @extractelement_v2f16(<2 x half> %op1) {
 define half @extractelement_v4f16(<4 x half> %op1) {
 ; CHECK-LABEL: extractelement_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[3]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f16:
@@ -48,7 +52,9 @@ define half @extractelement_v4f16(<4 x half> %op1) {
 define half @extractelement_v8f16(<8 x half> %op1) {
 ; CHECK-LABEL: extractelement_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v8f16:
@@ -67,6 +73,7 @@ define half @extractelement_v16f16(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v16f16:
@@ -85,7 +92,9 @@ define half @extractelement_v16f16(ptr %a) {
 define float @extractelement_v2f32(<2 x float> %op1) {
 ; CHECK-LABEL: extractelement_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f32:
@@ -103,7 +112,9 @@ define float @extractelement_v2f32(<2 x float> %op1) {
 define float @extractelement_v4f32(<4 x float> %op1) {
 ; CHECK-LABEL: extractelement_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f32:
@@ -122,6 +133,7 @@ define float @extractelement_v8f32(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v8f32:
@@ -151,7 +163,9 @@ define double @extractelement_v1f64(<1 x double> %op1) {
 define double @extractelement_v2f64(<2 x double> %op1) {
 ; CHECK-LABEL: extractelement_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v2f64:
@@ -170,6 +184,7 @@ define double @extractelement_v4f64(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0, #16]
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: extractelement_v4f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 72982d175901d8..2282e74af5d006 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -847,10 +847,10 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) {
 ; SVE2-NEXT:    fcvt z0.s, p0/m, z0.d
 ; SVE2-NEXT:    fcvt z1.s, p0/m, z1.d
 ; SVE2-NEXT:    ptrue p0.s, vl2
-; SVE2-NEXT:    uzp1 z2.s, z0.s, z0.s
-; SVE2-NEXT:    uzp1 z1.s, z1.s, z1.s
-; SVE2-NEXT:    splice z0.s, p0, { z1.s, z2.s }
+; SVE2-NEXT:    uzp1 z3.s, z0.s, z0.s
+; SVE2-NEXT:    uzp1 z2.s, z1.s, z1.s
 ; SVE2-NEXT:    mov z1.s, #0x7fffffff
+; SVE2-NEXT:    splice z0.s, p0, { z2.s, z3.s }
 ; SVE2-NEXT:    ldr q2, [x0]
 ; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
 ; SVE2-NEXT:    str q2, [x0]
@@ -1242,10 +1242,10 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) {
 ; SVE2-NEXT:    fcvt z0.h, p0/m, z0.s
 ; SVE2-NEXT:    fcvt z1.h, p0/m, z1.s
 ; SVE2-NEXT:    ptrue p0.h, vl4
-; SVE2-NEXT:    uzp1 z2.h, z0.h, z0.h
-; SVE2-NEXT:    uzp1 z1.h, z1.h, z1.h
-; SVE2-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; SVE2-NEXT:    uzp1 z3.h, z0.h, z0.h
+; SVE2-NEXT:    uzp1 z2.h, z1.h, z1.h
 ; SVE2-NEXT:    mov z1.h, #32767 // =0x7fff
+; SVE2-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; SVE2-NEXT:    ldr q2, [x0]
 ; SVE2-NEXT:    bsl z2.d, z2.d, z0.d, z1.d
 ; SVE2-NEXT:    str q2, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
index b3f8f9d0764947..f2c882c370eabe 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll
@@ -13,7 +13,10 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fadd_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f16:
@@ -60,7 +63,10 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fadd_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v4f16:
@@ -107,7 +113,10 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fadd_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v8f16:
@@ -324,7 +333,10 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fadd_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f32:
@@ -349,7 +361,10 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fadd_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v4f32:
@@ -434,7 +449,10 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fadd_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fadd_v2f64:
@@ -505,7 +523,10 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fdiv_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f16:
@@ -552,7 +573,10 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fdiv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v4f16:
@@ -599,7 +623,10 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fdiv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v8f16:
@@ -816,7 +843,10 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fdiv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f32:
@@ -841,7 +871,10 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fdiv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v4f32:
@@ -926,7 +959,10 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fdiv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fdiv_v2f64:
@@ -997,7 +1033,11 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
 ; CHECK-LABEL: fma_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f16:
@@ -1053,7 +1093,11 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
 ; CHECK-LABEL: fma_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f16:
@@ -1109,7 +1153,11 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
 ; CHECK-LABEL: fma_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v8f16:
@@ -1380,7 +1428,11 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
 ; CHECK-LABEL: fma_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f32:
@@ -1407,7 +1459,11 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
 ; CHECK-LABEL: fma_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f32:
@@ -1504,7 +1560,11 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
 ; CHECK-LABEL: fma_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f64:
@@ -1584,7 +1644,10 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fmul_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f16:
@@ -1631,7 +1694,10 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmul_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v4f16:
@@ -1678,7 +1744,10 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmul_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v8f16:
@@ -1895,7 +1964,10 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmul_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f32:
@@ -1920,7 +1992,10 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmul_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v4f32:
@@ -2005,7 +2080,10 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmul_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmul_v2f64:
@@ -2076,7 +2154,9 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fneg_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f16:
@@ -2114,7 +2194,9 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fneg_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v4f16:
@@ -2152,7 +2234,9 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fneg_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v8f16:
@@ -2315,7 +2399,9 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fneg_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f32:
@@ -2338,7 +2424,9 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fneg_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v4f32:
@@ -2411,7 +2499,9 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fneg_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fneg_v2f64:
@@ -2473,7 +2563,9 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fsqrt_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f16:
@@ -2511,7 +2603,9 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fsqrt_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v4f16:
@@ -2549,7 +2643,9 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fsqrt_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v8f16:
@@ -2712,7 +2808,9 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fsqrt_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f32:
@@ -2735,7 +2833,9 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fsqrt_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v4f32:
@@ -2808,7 +2908,9 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fsqrt_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsqrt_v2f64:
@@ -2870,7 +2972,10 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fsub_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f16:
@@ -2917,7 +3022,10 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fsub_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v4f16:
@@ -2964,7 +3072,10 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fsub_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v8f16:
@@ -3181,7 +3292,10 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fsub_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f32:
@@ -3206,7 +3320,10 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fsub_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v4f32:
@@ -3291,7 +3408,10 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fsub_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fsub_v2f64:
@@ -3362,7 +3482,9 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: fabs_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f16:
@@ -3400,7 +3522,9 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: fabs_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v4f16:
@@ -3438,7 +3562,9 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: fabs_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v8f16:
@@ -3601,7 +3727,9 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: fabs_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f32:
@@ -3624,7 +3752,9 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: fabs_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v4f32:
@@ -3697,7 +3827,9 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: fabs_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fabs z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fabs_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index 3962f288fa720b..200ffb60a7928a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -13,9 +13,12 @@ define <2 x i16> @fcmp_oeq_v2f16(<2 x half> %op1, <2 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f16:
@@ -48,8 +51,11 @@ define <4 x i16> @fcmp_oeq_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v4f16:
@@ -97,8 +103,11 @@ define <8 x i16> @fcmp_oeq_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v8f16:
@@ -318,8 +327,11 @@ define <2 x i32> @fcmp_oeq_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f32:
@@ -346,8 +358,11 @@ define <4 x i32> @fcmp_oeq_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v4f32:
@@ -443,6 +458,7 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) {
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csetm x8, eq
 ; CHECK-NEXT:    mov z0.d, x8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v1f64:
@@ -464,8 +480,11 @@ define <2 x i64> @fcmp_oeq_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fcmp_oeq_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcmeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcmp_oeq_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 6e0d169779e0c4..c96189b9602685 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -12,8 +12,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v2f16_to_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    str d0, [x0]
 ; CHECK-NEXT:    ret
@@ -39,8 +40,9 @@ define void @fcvt_v2f16_to_v2f32(<2 x half> %a, ptr %b) {
 define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v4f16_to_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
 ; CHECK-NEXT:    str q0, [x0]
 ; CHECK-NEXT:    ret
@@ -72,9 +74,10 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) {
 define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v8f16_to_v8f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
@@ -119,19 +122,21 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) {
 define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) {
 ; CHECK-LABEL: fcvt_v16f16_to_v16f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
-; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvt z2.s, p0/m, z2.h
 ; CHECK-NEXT:    fcvt z3.s, p0/m, z3.h
 ; CHECK-NEXT:    fcvt z1.s, p0/m, z1.h
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.h
-; CHECK-NEXT:    stp q2, q1, [x0, #32]
 ; CHECK-NEXT:    stp q3, q0, [x0]
+; CHECK-NEXT:    stp q2, q1, [x0, #32]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32:
@@ -1129,6 +1134,7 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) {
 ; CHECK-LABEL: fcvt_v1f64_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
 ; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -1147,6 +1153,7 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) {
 ; CHECK-LABEL: fcvt_v2f64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvt z0.s, p0/m, z0.d
 ; CHECK-NEXT:    st1w { z0.d }, p0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
index 88d8ac0fbbd663..680cb4fb0a7910 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
@@ -12,7 +12,11 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
 ; CHECK-LABEL: fma_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f16:
@@ -80,7 +84,11 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
 ; CHECK-LABEL: fma_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v8f16:
@@ -443,7 +451,11 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %o
 ; CHECK-LABEL: fma_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f32:
@@ -471,7 +483,11 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %o
 ; CHECK-LABEL: fma_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v4f32:
@@ -590,7 +606,11 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double
 ; CHECK-LABEL: fma_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fma_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 61c25145df31ad..84aea185917fae 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -13,7 +13,10 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmaxnm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v4f16:
@@ -60,7 +63,10 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmaxnm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v8f16:
@@ -277,7 +283,10 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmaxnm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v2f32:
@@ -302,7 +311,10 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmaxnm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v4f32:
@@ -406,7 +418,10 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmaxnm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxnm_v2f64:
@@ -477,7 +492,10 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fminnm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v4f16:
@@ -524,7 +542,10 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fminnm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v8f16:
@@ -741,7 +762,10 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fminnm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v2f32:
@@ -766,7 +790,10 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fminnm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v4f32:
@@ -870,7 +897,10 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fminnm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminnm_v2f64:
@@ -941,7 +971,10 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmax_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v4f16:
@@ -988,7 +1021,10 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmax_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v8f16:
@@ -1205,7 +1241,10 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmax_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v2f32:
@@ -1230,7 +1269,10 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmax_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v4f32:
@@ -1334,7 +1376,10 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmax_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmax_v2f64:
@@ -1405,7 +1450,10 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: fmin_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v4f16:
@@ -1452,7 +1500,10 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: fmin_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v8f16:
@@ -1669,7 +1720,10 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: fmin_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v2f32:
@@ -1694,7 +1748,10 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: fmin_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v4f32:
@@ -1798,7 +1855,10 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: fmin_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmin_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
index 0c5bfe8be1dab7..f081d4ac65b279 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce-fa64.ll
@@ -10,11 +10,15 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
 ; FA64-LABEL: fadda_v4f16:
 ; FA64:       // %bb.0:
 ; FA64-NEXT:    ptrue p0.h, vl4
+; FA64-NEXT:    // kill: def $h0 killed $h0 def $z0
+; FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; FA64-NEXT:    fadda h0, p0, h0, z1.h
+; FA64-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; FA64-NEXT:    ret
 ;
 ; NO-FA64-LABEL: fadda_v4f16:
 ; NO-FA64:       // %bb.0:
+; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; NO-FA64-NEXT:    fadd h0, h0, h1
 ; NO-FA64-NEXT:    mov z2.h, z1.h[1]
 ; NO-FA64-NEXT:    fadd h0, h0, h2

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
index 8a53607b8923b6..4eaaee7ce5055d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
@@ -11,6 +11,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define half @fadda_v4f16(half %start, <4 x half> %a) {
 ; CHECK-LABEL: fadda_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    mov z2.h, z1.h[1]
 ; CHECK-NEXT:    fadd h0, h0, h2
@@ -54,6 +55,7 @@ define half @fadda_v4f16(half %start, <4 x half> %a) {
 define half @fadda_v8f16(half %start, <8 x half> %a) {
 ; CHECK-LABEL: fadda_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    mov z2.h, z1.h[1]
 ; CHECK-NEXT:    fadd h0, h0, h2
@@ -256,6 +258,7 @@ define half @fadda_v16f16(half %start, ptr %a) {
 define float @fadda_v2f32(float %start, <2 x float> %a) {
 ; CHECK-LABEL: fadda_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    mov z1.s, z1.s[1]
 ; CHECK-NEXT:    fadd s0, s0, s1
@@ -278,6 +281,7 @@ define float @fadda_v2f32(float %start, <2 x float> %a) {
 define float @fadda_v4f32(float %start, <4 x float> %a) {
 ; CHECK-LABEL: fadda_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    mov z2.s, z1.s[1]
 ; CHECK-NEXT:    fadd s0, s0, s2
@@ -368,6 +372,7 @@ define double @fadda_v1f64(double %start, <1 x double> %a) {
 define double @fadda_v2f64(double %start, <2 x double> %a) {
 ; CHECK-LABEL: fadda_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    fadd d0, d0, d1
 ; CHECK-NEXT:    mov z1.d, z1.d[1]
 ; CHECK-NEXT:    fadd d0, d0, d1
@@ -427,6 +432,7 @@ define half @faddv_v4f16(half %start, <4 x half> %a) {
 ; CHECK-LABEL: faddv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -466,6 +472,7 @@ define half @faddv_v8f16(half %start, <8 x half> %a) {
 ; CHECK-LABEL: faddv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv h1, p0, z1.h
 ; CHECK-NEXT:    fadd h0, h0, h1
 ; CHECK-NEXT:    ret
@@ -626,6 +633,7 @@ define float @faddv_v2f32(float %start, <2 x float> %a) {
 ; CHECK-LABEL: faddv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    faddv s1, p0, z1.s
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
@@ -648,6 +656,7 @@ define float @faddv_v4f32(float %start, <4 x float> %a) {
 ; CHECK-LABEL: faddv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv s1, p0, z1.s
 ; CHECK-NEXT:    fadd s0, s0, s1
 ; CHECK-NEXT:    ret
@@ -719,6 +728,7 @@ define double @faddv_v2f64(double %start, <2 x double> %a) {
 ; CHECK-LABEL: faddv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    faddv d1, p0, z1.d
 ; CHECK-NEXT:    fadd d0, d0, d1
 ; CHECK-NEXT:    ret
@@ -770,7 +780,9 @@ define half @fmaxv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fmaxv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v4f16:
@@ -804,7 +816,9 @@ define half @fmaxv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fmaxv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v8f16:
@@ -860,6 +874,7 @@ define half @fmaxv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fmaxnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v16f16:
@@ -954,7 +969,9 @@ define float @fmaxv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fmaxv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v2f32:
@@ -974,7 +991,9 @@ define float @fmaxv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fmaxv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v4f32:
@@ -999,6 +1018,7 @@ define float @fmaxv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fmaxnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v8f32:
@@ -1040,7 +1060,9 @@ define double @fmaxv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fmaxv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v2f64:
@@ -1061,6 +1083,7 @@ define double @fmaxv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaxv_v4f64:
@@ -1087,7 +1110,9 @@ define half @fminv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fminv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v4f16:
@@ -1121,7 +1146,9 @@ define half @fminv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fminv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v8f16:
@@ -1177,6 +1204,7 @@ define half @fminv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminnmv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v16f16:
@@ -1271,7 +1299,9 @@ define float @fminv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fminv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v2f32:
@@ -1291,7 +1321,9 @@ define float @fminv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fminv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v4f32:
@@ -1316,6 +1348,7 @@ define float @fminv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fminnmv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v8f32:
@@ -1357,7 +1390,9 @@ define double @fminv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fminv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v2f64:
@@ -1378,6 +1413,7 @@ define double @fminv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fminnmv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminv_v4f64:
@@ -1404,7 +1440,9 @@ define half @fmaximumv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fmaximumv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v4f16:
@@ -1438,7 +1476,9 @@ define half @fmaximumv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fmaximumv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v8f16:
@@ -1494,6 +1534,7 @@ define half @fmaximumv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fmaxv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v16f16:
@@ -1588,7 +1629,9 @@ define float @fmaximumv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fmaximumv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v2f32:
@@ -1608,7 +1651,9 @@ define float @fmaximumv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fmaximumv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v4f32:
@@ -1633,6 +1678,7 @@ define float @fmaximumv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fmaxv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v8f32:
@@ -1674,7 +1720,9 @@ define double @fmaximumv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fmaximumv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v2f64:
@@ -1695,6 +1743,7 @@ define double @fmaximumv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fmaxv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fmaximumv_v4f64:
@@ -1721,7 +1770,9 @@ define half @fminimumv_v4f16(<4 x half> %a) {
 ; CHECK-LABEL: fminimumv_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v4f16:
@@ -1755,7 +1806,9 @@ define half @fminimumv_v8f16(<8 x half> %a) {
 ; CHECK-LABEL: fminimumv_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v8f16:
@@ -1811,6 +1864,7 @@ define half @fminimumv_v16f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    fminv h0, p0, z0.h
+; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v16f16:
@@ -1905,7 +1959,9 @@ define float @fminimumv_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: fminimumv_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v2f32:
@@ -1925,7 +1981,9 @@ define float @fminimumv_v4f32(<4 x float> %a) {
 ; CHECK-LABEL: fminimumv_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v4f32:
@@ -1950,6 +2008,7 @@ define float @fminimumv_v8f32(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    fminv s0, p0, z0.s
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v8f32:
@@ -1991,7 +2050,9 @@ define double @fminimumv_v2f64(<2 x double> %a) {
 ; CHECK-LABEL: fminimumv_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v2f64:
@@ -2012,6 +2073,7 @@ define double @fminimumv_v4f64(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    fminv d0, p0, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fminimumv_v4f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 70e4b02e9de6cd..03bc39a6ef3ee1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -13,7 +13,9 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintp_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f16:
@@ -51,7 +53,9 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintp_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v4f16:
@@ -89,7 +93,9 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintp_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v8f16:
@@ -252,7 +258,9 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintp_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f32:
@@ -275,7 +283,9 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintp_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v4f32:
@@ -367,7 +377,9 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintp_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintp z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintp_v2f64:
@@ -429,7 +441,9 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintm_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f16:
@@ -467,7 +481,9 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintm_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v4f16:
@@ -505,7 +521,9 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintm_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v8f16:
@@ -668,7 +686,9 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintm_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f32:
@@ -691,7 +711,9 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintm_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v4f32:
@@ -783,7 +805,9 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintm_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintm z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintm_v2f64:
@@ -845,7 +869,9 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frinti_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f16:
@@ -883,7 +909,9 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frinti_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v4f16:
@@ -921,7 +949,9 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frinti_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v8f16:
@@ -1084,7 +1114,9 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frinti_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f32:
@@ -1107,7 +1139,9 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frinti_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v4f32:
@@ -1199,7 +1233,9 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frinti_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinti z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinti_v2f64:
@@ -1261,7 +1297,9 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintx_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f16:
@@ -1299,7 +1337,9 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintx_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v4f16:
@@ -1337,7 +1377,9 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintx_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v8f16:
@@ -1500,7 +1542,9 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintx_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f32:
@@ -1523,7 +1567,9 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintx_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v4f32:
@@ -1615,7 +1661,9 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintx_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintx z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintx_v2f64:
@@ -1677,7 +1725,9 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frinta_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f16:
@@ -1715,7 +1765,9 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frinta_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v4f16:
@@ -1753,7 +1805,9 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frinta_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v8f16:
@@ -1916,7 +1970,9 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frinta_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f32:
@@ -1939,7 +1995,9 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frinta_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v4f32:
@@ -2031,7 +2089,9 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frinta_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frinta z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frinta_v2f64:
@@ -2093,7 +2153,9 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintn_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f16:
@@ -2131,7 +2193,9 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintn_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v4f16:
@@ -2169,7 +2233,9 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintn_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v8f16:
@@ -2332,7 +2398,9 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintn_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f32:
@@ -2355,7 +2423,9 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintn_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v4f32:
@@ -2447,7 +2517,9 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintn_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintn z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintn_v2f64:
@@ -2509,7 +2581,9 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) {
 ; CHECK-LABEL: frintz_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f16:
@@ -2547,7 +2621,9 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) {
 ; CHECK-LABEL: frintz_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v4f16:
@@ -2585,7 +2661,9 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) {
 ; CHECK-LABEL: frintz_v8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v8f16:
@@ -2748,7 +2826,9 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) {
 ; CHECK-LABEL: frintz_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f32:
@@ -2771,7 +2851,9 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) {
 ; CHECK-LABEL: frintz_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v4f32:
@@ -2863,7 +2945,9 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) {
 ; CHECK-LABEL: frintz_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    frintz z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: frintz_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index b2d5bf1ed8b1fa..bcc446d9d1a416 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -10,9 +10,12 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f16:
@@ -49,9 +52,12 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f16:
@@ -88,9 +94,12 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8f16:
@@ -245,9 +254,12 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f32:
@@ -274,9 +286,12 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f32:
@@ -390,11 +405,15 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f64:
@@ -418,6 +437,7 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask
 define void @select_v4f64(ptr %a, ptr %b, i1 %mask) {
 ; CHECK-LABEL: select_v4f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 8b0ecfe64daba9..11fee267660c03 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -12,7 +12,9 @@ define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f16_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i16:
@@ -194,9 +196,11 @@ define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i32:
@@ -220,9 +224,11 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) {
 define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f16_v4i32:
@@ -433,12 +439,14 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
 define <2 x i64> @fcvtzu_v2f16_v2i64(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f16_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.h, z0.h[1]
 ; CHECK-NEXT:    fcvtzu x8, h0
 ; CHECK-NEXT:    fcvtzu x9, h1
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fmov d1, x9
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f16_v2i64:
@@ -749,7 +757,9 @@ define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f32_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i16:
@@ -771,8 +781,10 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f32_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i16:
@@ -807,6 +819,7 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v8f32_v8i16:
@@ -929,7 +942,9 @@ define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f32_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i32:
@@ -951,7 +966,9 @@ define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v4f32_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f32_v4i32:
@@ -1021,9 +1038,11 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f32_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f32_v1i64:
@@ -1043,9 +1062,11 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) {
 define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f32_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f32_v2i64:
@@ -1167,6 +1188,7 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs w8, d0
 ; CHECK-NEXT:    mov z0.h, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i16:
@@ -1186,8 +1208,10 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f64_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i16:
@@ -1219,6 +1243,7 @@ define <4 x i16> @fcvtzu_v4f64_v4i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i16:
@@ -1275,6 +1300,7 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
 ; CHECK-NEXT:    zip1 z1.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v8f64_v8i16:
@@ -1478,8 +1504,10 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i32:
@@ -1499,8 +1527,10 @@ define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f64_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i32:
@@ -1529,6 +1559,7 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v4f64_v4i32:
@@ -1615,7 +1646,9 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v1f64_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v1f64_v1i64:
@@ -1635,7 +1668,9 @@ define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzu_v2f64_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzu z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzu_v2f64_v2i64:
@@ -1694,7 +1729,9 @@ define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f16_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i16:
@@ -1876,9 +1913,11 @@ define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f16_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i32:
@@ -1902,9 +1941,11 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) {
 define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f16_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f16_v4i32:
@@ -2116,12 +2157,14 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
 define <2 x i64> @fcvtzs_v2f16_v2i64(<2 x half> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f16_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.h, z0.h[1]
 ; CHECK-NEXT:    fcvtzs x8, h0
 ; CHECK-NEXT:    fcvtzs x9, h1
 ; CHECK-NEXT:    fmov d0, x8
 ; CHECK-NEXT:    fmov d1, x9
 ; CHECK-NEXT:    zip1 z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f16_v2i64:
@@ -2432,7 +2475,9 @@ define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f32_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i16:
@@ -2454,8 +2499,10 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f32_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i16:
@@ -2490,6 +2537,7 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v8f32_v8i16:
@@ -2612,7 +2660,9 @@ define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f32_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i32:
@@ -2634,7 +2684,9 @@ define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v4f32_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f32_v4i32:
@@ -2704,9 +2756,11 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f32_v1i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f32_v1i64:
@@ -2726,9 +2780,11 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) {
 define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f32_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f32_v2i64:
@@ -2852,6 +2908,7 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fcvtzs w8, d0
 ; CHECK-NEXT:    mov z0.h, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i16:
@@ -2871,8 +2928,10 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f64_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i16:
@@ -2904,6 +2963,7 @@ define <4 x i16> @fcvtzs_v4f64_v4i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i16:
@@ -2960,6 +3020,7 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) {
 ; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
 ; CHECK-NEXT:    zip1 z1.s, z2.s, z3.s
 ; CHECK-NEXT:    zip1 z0.d, z1.d, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v8f64_v8i16:
@@ -3163,8 +3224,10 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i32:
@@ -3184,8 +3247,10 @@ define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f64_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i32:
@@ -3214,6 +3279,7 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v4f64_v4i32:
@@ -3300,7 +3366,9 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v1f64_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v1f64_v1i64:
@@ -3320,7 +3388,9 @@ define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) {
 ; CHECK-LABEL: fcvtzs_v2f64_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    fcvtzs z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fcvtzs_v2f64_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 73b77c6dde4b23..ad5f91a5f39a49 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -8,14 +8,18 @@ target triple = "aarch64-unknown-linux-gnu"
 define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    mov z3.s, z2.s[1]
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    zip1 z2.h, z2.h, z3.h
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f16:
@@ -49,12 +53,16 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x i1> %mask
 define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f16:
@@ -101,13 +109,17 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x i1> %mask
 define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z2.h, z2.b
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    uunpklo z2.h, z2.b
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8f16:
@@ -326,12 +338,16 @@ define void @select_v16f16(ptr %a, ptr %b) {
 define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f32:
@@ -360,13 +376,17 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x i1> %m
 define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4f32:
@@ -493,13 +513,17 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
 define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index ae420059139d01..275d13ebfd9491 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -16,9 +16,11 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i8:
@@ -50,9 +52,11 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) {
 ; CHECK-NEXT:    index z1.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z2.b, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
 ; CHECK-NEXT:    mov z0.b, p0/m, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8i8:
@@ -86,9 +90,11 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) {
 ; CHECK-NEXT:    index z1.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z2.b, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z1.b, z2.b
 ; CHECK-NEXT:    mov z0.b, p0/m, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v16i8:
@@ -124,9 +130,11 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) {
 ; CHECK-NEXT:    index z2.b, #0, #1
 ; CHECK-NEXT:    ptrue p0.b
 ; CHECK-NEXT:    mov z3.b, w8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z2.b, z3.b
 ; CHECK-NEXT:    mov z1.b, p0/m, w8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v32i8:
@@ -163,9 +171,11 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2i16:
@@ -194,9 +204,11 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i16:
@@ -228,9 +240,11 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    mov z0.h, p0/m, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8i16:
@@ -264,9 +278,11 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) {
 ; CHECK-NEXT:    index z2.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z3.h, w8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z2.h, z3.h
 ; CHECK-NEXT:    mov z1.h, p0/m, w8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v16i16:
@@ -301,9 +317,11 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2i32:
@@ -332,9 +350,11 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    mov z0.s, p0/m, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i32:
@@ -369,6 +389,7 @@ define <8 x i32> @insertelement_v8i32(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.s, p0/m, w8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8i32:
@@ -398,6 +419,7 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: insertelement_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #5 // =0x5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v1i64:
@@ -420,9 +442,11 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) {
 ; CHECK-NEXT:    index z1.d, #0, #1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov w8, #5 // =0x5
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
 ; CHECK-NEXT:    mov z0.d, p0/m, x8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2i64:
@@ -455,6 +479,7 @@ define <4 x i64> @insertelement_v4i64(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.d, p0/m, x8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4i64:
@@ -482,7 +507,9 @@ define <2 x half> @insertelement_v2f16(<2 x half> %op1) {
 ; CHECK-LABEL: insertelement_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmov h1, #5.00000000
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2f16:
@@ -508,9 +535,11 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    fmov h1, #5.00000000
 ; CHECK-NEXT:    mov z0.h, p0/m, h1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4f16:
@@ -543,9 +572,11 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) {
 ; CHECK-NEXT:    index z1.h, #0, #1
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z1.h, z2.h
 ; CHECK-NEXT:    fmov h1, #5.00000000
 ; CHECK-NEXT:    mov z0.h, p0/m, h1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8f16:
@@ -584,6 +615,7 @@ define <16 x half> @insertelement_v16f16(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.h, p0/m, h2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v16f16:
@@ -620,9 +652,11 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    fmov s1, #5.00000000
 ; CHECK-NEXT:    mov z0.s, p0/m, s1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2f32:
@@ -651,9 +685,11 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) {
 ; CHECK-NEXT:    index z1.s, #0, #1
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    mov z2.s, w8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z1.s, z2.s
 ; CHECK-NEXT:    fmov s1, #5.00000000
 ; CHECK-NEXT:    mov z0.s, p0/m, s1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4f32:
@@ -688,6 +724,7 @@ define <8 x float> @insertelement_v8f32(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.s, p0/m, s2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v8f32:
@@ -740,9 +777,11 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) {
 ; CHECK-NEXT:    index z1.d, #0, #1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z1.d, z2.d
 ; CHECK-NEXT:    fmov d1, #5.00000000
 ; CHECK-NEXT:    mov z0.d, p0/m, d1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v2f64:
@@ -775,6 +814,7 @@ define <4 x double> @insertelement_v4f64(ptr %a) {
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    mov z1.d, p0/m, d2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: insertelement_v4f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index 95bf887e48913d..4360f3a12014a4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -12,7 +12,10 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: add_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i8:
@@ -46,7 +49,10 @@ define <4 x i8> @add_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: add_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v8i8:
@@ -96,7 +102,10 @@ define <8 x i8> @add_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @add_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: add_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v16i8:
@@ -334,7 +343,10 @@ define void @add_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: add_v2i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i16:
@@ -359,7 +371,10 @@ define <2 x i16> @add_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: add_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i16:
@@ -393,7 +408,10 @@ define <4 x i16> @add_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @add_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: add_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v8i16:
@@ -535,7 +553,10 @@ define void @add_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: add_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i32:
@@ -560,7 +581,10 @@ define <2 x i32> @add_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @add_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: add_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v4i32:
@@ -648,7 +672,10 @@ define void @add_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: add_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v1i64:
@@ -669,7 +696,10 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @add_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: add_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: add_v2i64:
@@ -741,12 +771,18 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: mul_v4i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i8:
@@ -781,12 +817,18 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: mul_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.b, p0/m, z0.b, z1.b
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v8i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.b, z0.b, z1.b
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v8i8:
@@ -837,12 +879,18 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: mul_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.b, p0/m, z0.b, z1.b
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v16i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.b, z0.b, z1.b
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v16i8:
@@ -1092,12 +1140,18 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: mul_v2i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i16:
@@ -1122,12 +1176,18 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: mul_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i16:
@@ -1162,12 +1222,18 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: mul_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v8i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v8i16:
@@ -1321,12 +1387,18 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: mul_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i32:
@@ -1351,12 +1423,18 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: mul_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v4i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v4i32:
@@ -1450,12 +1528,18 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: mul_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v1i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v1i64:
@@ -1477,12 +1561,18 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: mul_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: mul_v2i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i64:
@@ -1561,7 +1651,10 @@ define void @mul_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: sub_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i8:
@@ -1595,7 +1688,10 @@ define <4 x i8> @sub_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: sub_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v8i8:
@@ -1645,7 +1741,10 @@ define <8 x i8> @sub_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sub_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sub_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.b, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v16i8:
@@ -1883,7 +1982,10 @@ define void @sub_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: sub_v2i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i16:
@@ -1908,7 +2010,10 @@ define <2 x i16> @sub_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: sub_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i16:
@@ -1942,7 +2047,10 @@ define <4 x i16> @sub_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sub_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sub_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v8i16:
@@ -2084,7 +2192,10 @@ define void @sub_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: sub_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i32:
@@ -2109,7 +2220,10 @@ define <2 x i32> @sub_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @sub_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: sub_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v4i32:
@@ -2197,7 +2311,10 @@ define void @sub_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: sub_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v1i64:
@@ -2218,7 +2335,10 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @sub_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: sub_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sub_v2i64:
@@ -2290,8 +2410,10 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) {
 ; CHECK-LABEL: abs_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i8:
@@ -2325,7 +2447,9 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) {
 ; CHECK-LABEL: abs_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v8i8:
@@ -2375,7 +2499,9 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) {
 ; CHECK-LABEL: abs_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v16i8:
@@ -2610,8 +2736,10 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) {
 ; CHECK-LABEL: abs_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i16:
@@ -2636,7 +2764,9 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) {
 ; CHECK-LABEL: abs_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i16:
@@ -2670,7 +2800,9 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) {
 ; CHECK-LABEL: abs_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v8i16:
@@ -2809,7 +2941,9 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) {
 ; CHECK-LABEL: abs_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i32:
@@ -2834,7 +2968,9 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) {
 ; CHECK-LABEL: abs_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v4i32:
@@ -2919,7 +3055,9 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: abs_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    abs z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v1i64:
@@ -2941,7 +3079,9 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) {
 ; CHECK-LABEL: abs_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    abs z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: abs_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index 843d146051fff0..ba20de65a253a1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -13,8 +13,11 @@ define <8 x i8> @icmp_eq_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: icmp_eq_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v8i8:
@@ -74,8 +77,11 @@ define <16 x i8> @icmp_eq_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: icmp_eq_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, z1.b
 ; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v16i8:
@@ -367,8 +373,11 @@ define <4 x i16> @icmp_eq_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: icmp_eq_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v4i16:
@@ -408,8 +417,11 @@ define <8 x i16> @icmp_eq_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: icmp_eq_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.h, p0/z, z0.h, z1.h
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v8i16:
@@ -581,8 +593,11 @@ define <2 x i32> @icmp_eq_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: icmp_eq_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v2i32:
@@ -610,8 +625,11 @@ define <4 x i32> @icmp_eq_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: icmp_eq_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.s, p0/z, z0.s, z1.s
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v4i32:
@@ -711,8 +729,11 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: icmp_eq_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v1i64:
@@ -736,8 +757,11 @@ define <2 x i64> @icmp_eq_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: icmp_eq_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    cmpeq p0.d, p0/z, z0.d, z1.d
 ; CHECK-NEXT:    mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: icmp_eq_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index 6b5258ea35fb02..1fdcd4f8268708 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -10,10 +10,11 @@ target triple = "aarch64-unknown-linux-gnu"
 ;
 
 define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
-;
 ; CHECK-LABEL: sdiv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -21,6 +22,7 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
@@ -54,9 +56,11 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
@@ -70,6 +74,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
@@ -119,16 +124,18 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z4.s, z2.h
 ; CHECK-NEXT:    sunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    sunpklo z1.h, z1.b
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z2.h
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
@@ -141,18 +148,19 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
@@ -258,7 +266,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z4.h, z2.b
 ; CHECK-NEXT:    sunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    sunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
@@ -267,7 +274,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    sunpklo z16.h, z5.b
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    sunpklo z5.h, z5.b
 ; CHECK-NEXT:    sunpklo z18.s, z16.h
 ; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
@@ -276,32 +282,34 @@ define void @sdiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    sunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    sunpklo z6.s, z6.h
+; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    sdivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
 ; CHECK-NEXT:    sdiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z6.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z16.h, z16.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
 ; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    uzp1 z3.b, z6.b, z6.b
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v32i8:
@@ -455,9 +463,12 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
@@ -482,11 +493,14 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
@@ -520,11 +534,13 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sdivr z2.s, p0/m, z2.s, z3.s
@@ -533,6 +549,7 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
@@ -600,14 +617,14 @@ define void @sdiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    sdiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -698,7 +715,10 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
@@ -723,7 +743,10 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
@@ -808,7 +831,10 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
@@ -830,7 +856,10 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    sdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:
@@ -898,13 +927,16 @@ define void @sdiv_v4i64(ptr %a, ptr %b)  {
 define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: udiv_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i8:
@@ -938,9 +970,11 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: udiv_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
@@ -954,6 +988,7 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v8i8:
@@ -1003,16 +1038,18 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: udiv_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    ptrue p0.s, vl4
-; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z4.s, z2.h
 ; CHECK-NEXT:    uunpklo z5.s, z3.h
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
+; CHECK-NEXT:    uunpklo z1.h, z1.b
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
@@ -1025,18 +1062,19 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    splice z1.h, p0, { z3.h, z4.h }
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v16i8:
@@ -1142,7 +1180,6 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z4.h, z2.b
 ; CHECK-NEXT:    uunpklo z2.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    uunpklo z5.s, z4.h
 ; CHECK-NEXT:    ext z4.b, z4.b, z4.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
@@ -1151,7 +1188,6 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q5, [x0]
 ; CHECK-NEXT:    uunpklo z16.h, z5.b
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uunpklo z5.h, z5.b
 ; CHECK-NEXT:    uunpklo z18.s, z16.h
 ; CHECK-NEXT:    ext z16.b, z16.b, z16.b, #8
@@ -1160,32 +1196,34 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z18.s, z5.h
 ; CHECK-NEXT:    ext z5.b, z5.b, z5.b, #8
 ; CHECK-NEXT:    uunpklo z5.s, z5.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
 ; CHECK-NEXT:    uunpklo z16.s, z6.h
 ; CHECK-NEXT:    ext z6.b, z6.b, z6.b, #8
 ; CHECK-NEXT:    uunpklo z6.s, z6.h
+; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    udivr z16.s, p0/m, z16.s, z18.s
-; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z18.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z19.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z21.h, z7.h, z7.h
 ; CHECK-NEXT:    udiv z5.s, p0/m, z5.s, z6.s
-; CHECK-NEXT:    uzp1 z6.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z0.h, z16.h, z16.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z16.h, z16.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT:    splice z2.h, p0, { z20.h, z21.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
 ; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    uzp1 z3.b, z6.b, z6.b
+; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    stp q2, q0, [x0]
+; CHECK-NEXT:    uzp1 z2.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z3.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v32i8:
@@ -1338,10 +1376,13 @@ define void @udiv_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: udiv_v2i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i16:
@@ -1366,11 +1407,14 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: udiv_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i16:
@@ -1404,11 +1448,13 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: udiv_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
@@ -1417,6 +1463,7 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v8i16:
@@ -1484,14 +1531,14 @@ define void @udiv_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z5.s, p0/m, z5.s, z6.s
 ; CHECK-NEXT:    udiv z3.s, p0/m, z3.s, z4.s
-; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    uzp1 z1.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    stp q0, q1, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -1582,7 +1629,10 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: udiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i32:
@@ -1607,7 +1657,10 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: udiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v4i32:
@@ -1692,7 +1745,10 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: udiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v1i64:
@@ -1714,7 +1770,10 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: udiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    udiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: udiv_v2i64:
@@ -1846,14 +1905,14 @@ define void @udiv_constantsplat_v8i32(ptr %a)  {
 ; NONEON-NOSVE-NEXT:    lsr x10, x10, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w10
 ; NONEON-NOSVE-NEXT:    add w9, w10, w9, lsr #1
-; NONEON-NOSVE-NEXT:    lsr w10, w9, #6
+; NONEON-NOSVE-NEXT:    lsr w11, w9, #6
 ; NONEON-NOSVE-NEXT:    ldr w9, [sp]
 ; NONEON-NOSVE-NEXT:    umull x8, w9, w8
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #32
 ; NONEON-NOSVE-NEXT:    sub w9, w9, w8
 ; NONEON-NOSVE-NEXT:    add w8, w8, w9, lsr #1
 ; NONEON-NOSVE-NEXT:    lsr w8, w8, #6
-; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp w8, w11, [sp, #32]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #64

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
index 1d35e9376cc682..25a6ea490c163d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll
@@ -16,6 +16,7 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i1_v8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -89,6 +90,7 @@ define void @sext_v8i1_v8i32(<8 x i1> %a, ptr %out) {
 define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i3_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -131,6 +133,7 @@ define void @sext_v4i3_v4i64(<4 x i3> %a, ptr %out) {
 define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
@@ -418,6 +421,7 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) {
 define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -472,14 +476,15 @@ define void @sext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 define void @sext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -886,8 +891,9 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) {
 define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -921,6 +927,7 @@ define void @sext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i8_v8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -997,30 +1004,31 @@ define void @sext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 define void @sext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: sext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.h, z0.b
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.s, z1.h
 ; CHECK-NEXT:    sunpklo z4.d, z2.s
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
-; CHECK-NEXT:    sunpklo z5.d, z3.s
-; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z6.d, z1.s
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z5.d, z3.s
+; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z2.d, z2.s
+; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z7.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z3.d, z3.s
-; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    stp q4, q2, [x0]
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q5, q3, [x0, #64]
 ; CHECK-NEXT:    stp q6, q1, [x0, #32]
+; CHECK-NEXT:    stp q5, q3, [x0, #64]
 ; CHECK-NEXT:    stp q7, q0, [x0, #96]
 ; CHECK-NEXT:    ret
 ;
@@ -1577,6 +1585,7 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) {
 define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
@@ -1728,6 +1737,7 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) {
 define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -1764,14 +1774,15 @@ define void @sext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 define void @sext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: sext_v8i16_v8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    sunpklo z2.d, z1.s
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z3.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    sunpklo z1.d, z1.s
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -1983,6 +1994,7 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) {
 define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 ; CHECK-LABEL: sext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
@@ -2078,6 +2090,7 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) {
 define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
@@ -2365,6 +2378,7 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) {
 define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i8_v8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -2419,14 +2433,15 @@ define void @zext_v8i8_v8i32(<8 x i8> %a, ptr %out) {
 define void @zext_v16i8_v16i32(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -2833,6 +2848,7 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) {
 define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i8_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
@@ -2872,6 +2888,7 @@ define void @zext_v4i8_v4i64(<4 x i8> %a, ptr %out) {
 define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i8_v8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -2952,30 +2969,31 @@ define void @zext_v8i8_v8i64(<8 x i8> %a, ptr %out) {
 define void @zext_v16i8_v16i64(<16 x i8> %a, ptr %out) {
 ; CHECK-LABEL: zext_v16i8_v16i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.s, z1.h
 ; CHECK-NEXT:    uunpklo z4.d, z2.s
 ; CHECK-NEXT:    ext z2.b, z2.b, z2.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    uunpklo z5.d, z3.s
-; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z6.d, z1.s
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z5.d, z3.s
+; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z7.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z3.d, z3.s
-; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    stp q4, q2, [x0]
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    stp q5, q3, [x0, #64]
 ; CHECK-NEXT:    stp q6, q1, [x0, #32]
+; CHECK-NEXT:    stp q5, q3, [x0, #64]
 ; CHECK-NEXT:    stp q7, q0, [x0, #96]
 ; CHECK-NEXT:    ret
 ;
@@ -3585,6 +3603,7 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) {
 define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i16_v8i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
@@ -3736,6 +3755,7 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) {
 define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i16_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
@@ -3774,14 +3794,15 @@ define void @zext_v4i16_v4i64(<4 x i16> %a, ptr %out) {
 define void @zext_v8i16_v8i64(<8 x i16> %a, ptr %out) {
 ; CHECK-LABEL: zext_v8i16_v8i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.s, z0.h
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z2.d, z1.s
 ; CHECK-NEXT:    ext z1.b, z1.b, z1.b, #8
+; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z3.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    uunpklo z1.d, z1.s
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    stp q2, q1, [x0]
 ; CHECK-NEXT:    stp q3, q0, [x0, #32]
@@ -4017,6 +4038,7 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) {
 define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) {
 ; CHECK-LABEL: zext_v4i32_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -4116,6 +4138,7 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    mov z1.s, w0
 ; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE-NEXT:    uunpklo z1.d, z1.s
 ; SVE-NEXT:    mul z0.d, p0/m, z0.d, z1.d
 ; SVE-NEXT:    str q0, [x1]
@@ -4124,6 +4147,7 @@ define void @extend_and_mul(i32 %0, <2 x i64> %1, ptr %2) {
 ; SVE2-LABEL: extend_and_mul:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    mov z1.s, w0
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; SVE2-NEXT:    uunpklo z1.d, z1.s
 ; SVE2-NEXT:    mul z0.d, z1.d, z0.d
 ; SVE2-NEXT:    str q0, [x1]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 566621f4d1635d..687dd9445f387b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -12,7 +12,10 @@ target triple = "aarch64-unknown-linux-gnu"
 define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: and_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v8i8:
@@ -62,7 +65,10 @@ define <8 x i8> @and_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @and_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: and_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v16i8:
@@ -300,7 +306,10 @@ define void @and_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: and_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v4i16:
@@ -334,7 +343,10 @@ define <4 x i16> @and_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @and_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: and_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v8i16:
@@ -476,7 +488,10 @@ define void @and_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: and_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v2i32:
@@ -501,7 +516,10 @@ define <2 x i32> @and_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @and_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: and_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v4i32:
@@ -589,7 +607,10 @@ define void @and_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: and_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v1i64:
@@ -610,7 +631,10 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @and_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: and_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: and_v2i64:
@@ -681,7 +705,10 @@ define void @and_v4i64(ptr %a, ptr %b) {
 define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: or_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v8i8:
@@ -731,7 +758,10 @@ define <8 x i8> @or_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @or_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: or_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v16i8:
@@ -969,7 +999,10 @@ define void @or_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: or_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v4i16:
@@ -1003,7 +1036,10 @@ define <4 x i16> @or_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @or_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: or_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v8i16:
@@ -1145,7 +1181,10 @@ define void @or_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: or_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v2i32:
@@ -1170,7 +1209,10 @@ define <2 x i32> @or_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @or_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: or_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v4i32:
@@ -1258,7 +1300,10 @@ define void @or_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: or_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v1i64:
@@ -1279,7 +1324,10 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @or_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: or_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: or_v2i64:
@@ -1350,7 +1398,10 @@ define void @or_v4i64(ptr %a, ptr %b) {
 define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: xor_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v8i8:
@@ -1400,7 +1451,10 @@ define <8 x i8> @xor_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @xor_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: xor_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v16i8:
@@ -1638,7 +1692,10 @@ define void @xor_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: xor_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v4i16:
@@ -1672,7 +1729,10 @@ define <4 x i16> @xor_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @xor_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: xor_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v8i16:
@@ -1814,7 +1874,10 @@ define void @xor_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: xor_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v2i32:
@@ -1839,7 +1902,10 @@ define <2 x i32> @xor_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @xor_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: xor_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v4i32:
@@ -1927,7 +1993,10 @@ define void @xor_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: xor_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v1i64:
@@ -1948,7 +2017,10 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 define <2 x i64> @xor_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: xor_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: xor_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index 1aaec95cd9624c..1bca7dd09d9b7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -13,7 +13,10 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: smax_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v8i8:
@@ -72,7 +75,10 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: smax_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v16i8:
@@ -361,7 +367,10 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: smax_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v4i16:
@@ -400,7 +409,10 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: smax_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v8i16:
@@ -569,7 +581,10 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: smax_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v2i32:
@@ -596,7 +611,10 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: smax_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v4i32:
@@ -694,7 +712,10 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: smax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v1i64:
@@ -718,7 +739,10 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: smax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smax_v2i64:
@@ -795,7 +819,10 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: smin_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v8i8:
@@ -854,7 +881,10 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: smin_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v16i8:
@@ -1143,7 +1173,10 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: smin_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v4i16:
@@ -1182,7 +1215,10 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: smin_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v8i16:
@@ -1351,7 +1387,10 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: smin_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v2i32:
@@ -1378,7 +1417,10 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: smin_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v4i32:
@@ -1476,7 +1518,10 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: smin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v1i64:
@@ -1500,7 +1545,10 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: smin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smin_v2i64:
@@ -1577,7 +1625,10 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: umax_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v8i8:
@@ -1636,7 +1687,10 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: umax_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v16i8:
@@ -1925,7 +1979,10 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: umax_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v4i16:
@@ -1964,7 +2021,10 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: umax_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v8i16:
@@ -2133,7 +2193,10 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: umax_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v2i32:
@@ -2160,7 +2223,10 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: umax_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v4i32:
@@ -2258,7 +2324,10 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: umax_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v1i64:
@@ -2282,7 +2351,10 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: umax_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umax_v2i64:
@@ -2359,7 +2431,10 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: umin_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v8i8:
@@ -2418,7 +2493,10 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: umin_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v16i8:
@@ -2707,7 +2785,10 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: umin_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v4i16:
@@ -2746,7 +2827,10 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: umin_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v8i16:
@@ -2915,7 +2999,10 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: umin_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v2i32:
@@ -2942,7 +3029,10 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: umin_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v4i32:
@@ -3040,7 +3130,10 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: umin_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v1i64:
@@ -3064,7 +3157,10 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: umin_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umin_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
index 793c5ad625930a..94d5bb1543b0e0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mla-neon-fa64.ll
@@ -15,7 +15,11 @@ define <8 x i8> @mla8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) {
 ; NO-FA64-LABEL: mla8xi8:
 ; NO-FA64:       // %bb.0:
 ; NO-FA64-NEXT:    ptrue p0.b, vl8
+; NO-FA64-NEXT:    // kill: def $d0 killed $d0 def $z0
+; NO-FA64-NEXT:    // kill: def $d2 killed $d2 def $z2
+; NO-FA64-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; NO-FA64-NEXT:    mad z0.b, p0/m, z1.b, z2.b
+; NO-FA64-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; NO-FA64-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mla8xi8:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
index 007262750c56b8..319fa5c8458275 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mul.ll
@@ -12,7 +12,10 @@ target triple = "aarch64-unknown-linux-gnu"
 define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE2-LABEL: mul_v2i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    mul z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: mul_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index 269d684ff22027..b0fdce9a93bd3b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -17,19 +17,25 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: smulh_v4i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE-NEXT:    sxtb z1.h, p0/m, z1.h
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
 ; SVE-NEXT:    lsr z0.h, z0.h, #4
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i8:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    ptrue p0.h, vl4
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    sxtb z0.h, p0/m, z0.h
 ; SVE2-NEXT:    sxtb z1.h, p0/m, z1.h
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
 ; SVE2-NEXT:    lsr z0.h, z0.h, #4
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i8:
@@ -74,12 +80,18 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: smulh_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v8i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v8i8:
@@ -144,12 +156,18 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: smulh_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v16i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.b, z0.b, z1.b
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v16i8:
@@ -557,19 +575,25 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: smulh_v2i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE-NEXT:    sxth z1.s, p0/m, z1.s
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
 ; SVE-NEXT:    lsr z0.s, z0.s, #16
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i16:
 ; SVE2:       // %bb.0:
 ; SVE2-NEXT:    ptrue p0.s, vl2
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    sxth z0.s, p0/m, z0.s
 ; SVE2-NEXT:    sxth z1.s, p0/m, z1.s
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
 ; SVE2-NEXT:    lsr z0.s, z0.s, #16
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i16:
@@ -601,12 +625,18 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: smulh_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i16:
@@ -649,12 +679,18 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: smulh_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v8i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v8i16:
@@ -874,12 +910,18 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: smulh_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i32:
@@ -909,12 +951,18 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: smulh_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v4i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v4i32:
@@ -1035,12 +1083,18 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: smulh_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v1i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v1i64:
@@ -1068,12 +1122,18 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: smulh_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: smulh_v2i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    smulh z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: smulh_v2i64:
@@ -1163,19 +1223,25 @@ define void @smulh_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; SVE-LABEL: umulh_v4i8:
 ; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    and z0.h, z0.h, #0xff
 ; SVE-NEXT:    and z1.h, z1.h, #0xff
-; SVE-NEXT:    ptrue p0.h, vl4
 ; SVE-NEXT:    mul z0.h, p0/m, z0.h, z1.h
 ; SVE-NEXT:    lsr z0.h, z0.h, #4
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    and z0.h, z0.h, #0xff
 ; SVE2-NEXT:    and z1.h, z1.h, #0xff
 ; SVE2-NEXT:    mul z0.h, z0.h, z1.h
 ; SVE2-NEXT:    lsr z0.h, z0.h, #4
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i8:
@@ -1218,12 +1284,18 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; SVE-LABEL: umulh_v8i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl8
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v8i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v8i8:
@@ -1286,12 +1358,18 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; SVE-LABEL: umulh_v16i8:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.b, vl16
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v16i8:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.b, z0.b, z1.b
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v16i8:
@@ -1698,19 +1776,25 @@ define void @umulh_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; SVE-LABEL: umulh_v2i16:
 ; SVE:       // %bb.0:
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    ptrue p0.s, vl2
 ; SVE-NEXT:    and z0.s, z0.s, #0xffff
 ; SVE-NEXT:    and z1.s, z1.s, #0xffff
-; SVE-NEXT:    ptrue p0.s, vl2
 ; SVE-NEXT:    mul z0.s, p0/m, z0.s, z1.s
 ; SVE-NEXT:    lsr z0.s, z0.s, #16
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; SVE2-NEXT:    and z0.s, z0.s, #0xffff
 ; SVE2-NEXT:    and z1.s, z1.s, #0xffff
 ; SVE2-NEXT:    mul z0.s, z0.s, z1.s
 ; SVE2-NEXT:    lsr z0.s, z0.s, #16
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i16:
@@ -1742,12 +1826,18 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; SVE-LABEL: umulh_v4i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl4
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i16:
@@ -1790,12 +1880,18 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; SVE-LABEL: umulh_v8i16:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.h, vl8
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v8i16:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.h, z0.h, z1.h
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v8i16:
@@ -2015,12 +2111,18 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; SVE-LABEL: umulh_v2i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl2
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i32:
@@ -2050,12 +2152,18 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; SVE-LABEL: umulh_v4i32:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.s, vl4
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v4i32:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.s, z0.s, z1.s
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v4i32:
@@ -2178,12 +2286,18 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; SVE-LABEL: umulh_v1i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl1
+; SVE-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v1i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $d0 killed $d0 def $z0
+; SVE2-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v1i64:
@@ -2209,12 +2323,18 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; SVE-LABEL: umulh_v2i64:
 ; SVE:       // %bb.0:
 ; SVE-NEXT:    ptrue p0.d, vl2
+; SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
+; SVE-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE-NEXT:    ret
 ;
 ; SVE2-LABEL: umulh_v2i64:
 ; SVE2:       // %bb.0:
+; SVE2-NEXT:    // kill: def $q0 killed $q0 def $z0
+; SVE2-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; SVE2-NEXT:    umulh z0.d, z0.d, z1.d
+; SVE2-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; SVE2-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: umulh_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
index 6a01b342850e97..92a67cba55f7a1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll
@@ -13,8 +13,10 @@ define i8 @uaddv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: uaddv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v8i8:
@@ -47,8 +49,10 @@ define i8 @uaddv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: uaddv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v16i8:
@@ -100,6 +104,7 @@ define i8 @uaddv_v32i8(ptr %a) {
 ; CHECK-NEXT:    add z0.b, z1.b, z0.b
 ; CHECK-NEXT:    uaddv d0, p0, z0.b
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v32i8:
@@ -181,8 +186,10 @@ define i16 @uaddv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: uaddv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v4i16:
@@ -207,8 +214,10 @@ define i16 @uaddv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: uaddv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v8i16:
@@ -244,6 +253,7 @@ define i16 @uaddv_v16i16(ptr %a) {
 ; CHECK-NEXT:    add z0.h, z1.h, z0.h
 ; CHECK-NEXT:    uaddv d0, p0, z0.h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v16i16:
@@ -293,8 +303,10 @@ define i32 @uaddv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: uaddv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v2i32:
@@ -314,8 +326,10 @@ define i32 @uaddv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: uaddv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v4i32:
@@ -340,6 +354,7 @@ define i32 @uaddv_v8i32(ptr %a) {
 ; CHECK-NEXT:    add z0.s, z1.s, z0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: uaddv_v8i32:
@@ -369,6 +384,7 @@ define i64 @uaddv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: uaddv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uaddv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -418,6 +434,7 @@ define i8 @smaxv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: smaxv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -459,6 +476,7 @@ define i8 @smaxv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: smaxv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -639,6 +657,7 @@ define i16 @smaxv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: smaxv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -668,6 +687,7 @@ define i16 @smaxv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: smaxv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -776,6 +796,7 @@ define i32 @smaxv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: smaxv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    smaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -798,6 +819,7 @@ define i32 @smaxv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: smaxv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -865,6 +887,7 @@ define i64 @smaxv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: smaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    smaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -919,6 +942,7 @@ define i8 @sminv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: sminv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -960,6 +984,7 @@ define i8 @sminv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: sminv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1140,6 +1165,7 @@ define i16 @sminv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: sminv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1169,6 +1195,7 @@ define i16 @sminv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: sminv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1277,6 +1304,7 @@ define i32 @sminv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: sminv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1299,6 +1327,7 @@ define i32 @sminv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: sminv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1366,6 +1395,7 @@ define i64 @sminv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: sminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1420,6 +1450,7 @@ define i8 @umaxv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: umaxv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1461,6 +1492,7 @@ define i8 @umaxv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: umaxv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1641,6 +1673,7 @@ define i16 @umaxv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: umaxv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1670,6 +1703,7 @@ define i16 @umaxv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: umaxv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1778,6 +1812,7 @@ define i32 @umaxv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: umaxv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1800,6 +1835,7 @@ define i32 @umaxv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: umaxv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1867,6 +1903,7 @@ define i64 @umaxv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: umaxv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    umaxv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -1921,6 +1958,7 @@ define i8 @uminv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: uminv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1962,6 +2000,7 @@ define i8 @uminv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: uminv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2142,6 +2181,7 @@ define i16 @uminv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: uminv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2171,6 +2211,7 @@ define i16 @uminv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: uminv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2279,6 +2320,7 @@ define i32 @uminv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: uminv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2301,6 +2343,7 @@ define i32 @uminv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: uminv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -2368,6 +2411,7 @@ define i64 @uminv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: uminv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uminv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index b351fec3c87e7c..9497ec88e57b4d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -13,6 +13,8 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: srem_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p1.s, vl4
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
@@ -21,6 +23,7 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-NEXT:    sdivr z2.s, p1/m, z2.s, z3.s
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i8:
@@ -58,6 +61,8 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: srem_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -76,6 +81,7 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v8i8:
@@ -133,6 +139,8 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: srem_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.h, z1.b
 ; CHECK-NEXT:    sunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -152,25 +160,26 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    mov z3.d, z1.d
 ; CHECK-NEXT:    sunpklo z5.s, z5.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    sunpklo z3.h, z3.b
 ; CHECK-NEXT:    sunpklo z6.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
-; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
+; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v16i8:
@@ -297,7 +306,6 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q3, [x0]
 ; CHECK-NEXT:    ldr q4, [x1]
 ; CHECK-NEXT:    sunpklo z16.s, z16.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
 ; CHECK-NEXT:    sunpklo z17.h, z4.b
 ; CHECK-NEXT:    sunpklo z18.h, z3.b
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
@@ -307,11 +315,9 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    sunpklo z17.s, z17.h
 ; CHECK-NEXT:    sunpklo z18.s, z18.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
 ; CHECK-NEXT:    sdivr z19.s, p0/m, z19.s, z20.s
 ; CHECK-NEXT:    mov z20.d, z3.d
 ; CHECK-NEXT:    ext z20.b, z20.b, z3.b, #8
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
 ; CHECK-NEXT:    sunpklo z20.h, z20.b
 ; CHECK-NEXT:    sunpklo z22.s, z20.h
 ; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
@@ -319,29 +325,32 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov z18.d, z4.d
 ; CHECK-NEXT:    sunpklo z20.s, z20.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z4.b, #8
-; CHECK-NEXT:    uzp1 z16.h, z19.h, z19.h
 ; CHECK-NEXT:    sunpklo z18.h, z18.b
 ; CHECK-NEXT:    sunpklo z21.s, z18.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    sunpklo z18.s, z18.h
 ; CHECK-NEXT:    sdivr z21.s, p0/m, z21.s, z22.s
 ; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
 ; CHECK-NEXT:    sdivr z18.s, p0/m, z18.s, z20.s
+; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z19.h, z21.h, z21.h
-; CHECK-NEXT:    uzp1 z6.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z20.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z18.b, z5.b, z5.b
-; CHECK-NEXT:    splice z16.h, p0, { z19.h, z20.h }
+; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
+; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
+; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
+; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z5.b, p0, { z17.b, z18.b }
-; CHECK-NEXT:    uzp1 z7.b, z16.b, z16.b
-; CHECK-NEXT:    splice z2.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
+; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
 ; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
@@ -530,6 +539,8 @@ define void @srem_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: srem_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -537,6 +548,7 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i16:
@@ -574,6 +586,8 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: srem_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    sunpklo z2.s, z1.h
 ; CHECK-NEXT:    sunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -586,11 +600,12 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    sunpklo z3.s, z3.h
 ; CHECK-NEXT:    sdivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v8i16:
@@ -669,15 +684,15 @@ define void @srem_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    sdivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    mov z7.d, z1.d
 ; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    sunpklo z7.s, z7.h
 ; CHECK-NEXT:    sdivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
@@ -787,9 +802,12 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: srem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v2i32:
@@ -817,9 +835,12 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: srem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v4i32:
@@ -925,9 +946,12 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: srem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v1i64:
@@ -950,9 +974,12 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: srem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    sdiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: srem_v2i64:
@@ -1035,15 +1062,18 @@ define void @srem_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: urem_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
-; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    udivr z2.s, p0/m, z2.s, z3.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i8:
@@ -1081,6 +1111,8 @@ define <4 x i8> @urem_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: urem_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1099,6 +1131,7 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v8i8:
@@ -1156,6 +1189,8 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: urem_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.h, z1.b
 ; CHECK-NEXT:    uunpklo z3.h, z0.b
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1175,25 +1210,26 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-NEXT:    mov z3.d, z1.d
 ; CHECK-NEXT:    uunpklo z5.s, z5.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z1.b, #8
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    uunpklo z3.h, z3.b
 ; CHECK-NEXT:    uunpklo z6.s, z3.h
 ; CHECK-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z5.s
-; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
-; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    splice z2.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    uzp1 z7.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    uzp1 z5.b, z3.b, z3.b
+; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z2.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v16i8:
@@ -1320,7 +1356,6 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldr q3, [x0]
 ; CHECK-NEXT:    ldr q4, [x1]
 ; CHECK-NEXT:    uunpklo z16.s, z16.h
-; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
 ; CHECK-NEXT:    uunpklo z17.h, z4.b
 ; CHECK-NEXT:    uunpklo z18.h, z3.b
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
@@ -1330,11 +1365,9 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    uunpklo z17.s, z17.h
 ; CHECK-NEXT:    uunpklo z18.s, z18.h
-; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
 ; CHECK-NEXT:    udivr z19.s, p0/m, z19.s, z20.s
 ; CHECK-NEXT:    mov z20.d, z3.d
 ; CHECK-NEXT:    ext z20.b, z20.b, z3.b, #8
-; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
 ; CHECK-NEXT:    uunpklo z20.h, z20.b
 ; CHECK-NEXT:    uunpklo z22.s, z20.h
 ; CHECK-NEXT:    ext z20.b, z20.b, z20.b, #8
@@ -1342,29 +1375,32 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 ; CHECK-NEXT:    mov z18.d, z4.d
 ; CHECK-NEXT:    uunpklo z20.s, z20.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z4.b, #8
-; CHECK-NEXT:    uzp1 z16.h, z19.h, z19.h
 ; CHECK-NEXT:    uunpklo z18.h, z18.b
 ; CHECK-NEXT:    uunpklo z21.s, z18.h
 ; CHECK-NEXT:    ext z18.b, z18.b, z18.b, #8
 ; CHECK-NEXT:    uunpklo z18.s, z18.h
 ; CHECK-NEXT:    udivr z21.s, p0/m, z21.s, z22.s
 ; CHECK-NEXT:    uzp1 z22.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
+; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
 ; CHECK-NEXT:    udivr z18.s, p0/m, z18.s, z20.s
+; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z20.h, z17.h, z17.h
 ; CHECK-NEXT:    splice z7.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z19.h, z21.h, z21.h
-; CHECK-NEXT:    uzp1 z6.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z20.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z18.b, z5.b, z5.b
-; CHECK-NEXT:    splice z16.h, p0, { z19.h, z20.h }
+; CHECK-NEXT:    uzp1 z16.h, z21.h, z21.h
+; CHECK-NEXT:    splice z2.h, p0, { z19.h, z20.h }
+; CHECK-NEXT:    uzp1 z6.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z7.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z17.h, z18.h, z18.h
+; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z17.b, z2.b, z2.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    splice z5.b, p0, { z17.b, z18.b }
-; CHECK-NEXT:    uzp1 z7.b, z16.b, z16.b
-; CHECK-NEXT:    splice z2.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    splice z5.b, p0, { z6.b, z7.b }
+; CHECK-NEXT:    uzp1 z18.b, z16.b, z16.b
+; CHECK-NEXT:    splice z2.b, p0, { z17.b, z18.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    mls z0.b, p0/m, z5.b, z1.b
 ; CHECK-NEXT:    msb z2.b, p0/m, z4.b, z3.b
@@ -1553,6 +1589,8 @@ define void @urem_v32i8(ptr %a, ptr %b) {
 define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: urem_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1560,6 +1598,7 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i16:
@@ -1597,6 +1636,8 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: urem_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z2.s, z1.h
 ; CHECK-NEXT:    uunpklo z3.s, z0.h
 ; CHECK-NEXT:    ptrue p0.s, vl4
@@ -1609,11 +1650,12 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-NEXT:    uunpklo z3.s, z3.h
 ; CHECK-NEXT:    udivr z3.s, p0/m, z3.s, z4.s
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z4.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z5.h, z3.h, z3.h
+; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    mls z0.h, p0/m, z2.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v8i16:
@@ -1692,15 +1734,15 @@ define void @urem_v16i16(ptr %a, ptr %b) {
 ; CHECK-NEXT:    udivr z6.s, p0/m, z6.s, z7.s
 ; CHECK-NEXT:    mov z7.d, z1.d
 ; CHECK-NEXT:    ext z7.b, z7.b, z1.b, #8
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
 ; CHECK-NEXT:    uunpklo z7.s, z7.h
 ; CHECK-NEXT:    udivr z7.s, p0/m, z7.s, z16.s
-; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z16.h, z5.h, z5.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    splice z2.h, p0, { z5.h, z6.h }
-; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
-; CHECK-NEXT:    splice z5.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z17.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z5.h, z2.h, z2.h
+; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z6.h, z7.h, z7.h
+; CHECK-NEXT:    splice z5.h, p0, { z5.h, z6.h }
 ; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    msb z2.h, p0/m, z4.h, z3.h
 ; CHECK-NEXT:    mls z0.h, p0/m, z5.h, z1.h
@@ -1810,9 +1852,12 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: urem_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v2i32:
@@ -1840,9 +1885,12 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: urem_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.s, p0/m, z2.s, z1.s
 ; CHECK-NEXT:    mls z0.s, p0/m, z2.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v4i32:
@@ -1948,9 +1996,12 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: urem_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v1i64:
@@ -1973,9 +2024,12 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: urem_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    movprfx z2, z0
 ; CHECK-NEXT:    udiv z2.d, p0/m, z2.d, z1.d
 ; CHECK-NEXT:    mls z0.d, p0/m, z2.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: urem_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 9be0b52676b380..4ac156c42fda00 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -10,9 +10,12 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i8:
@@ -49,8 +52,11 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i8:
@@ -103,8 +109,11 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.b, w0
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v16i8:
@@ -354,9 +363,12 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i16:
@@ -383,9 +395,12 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i16:
@@ -422,9 +437,12 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.h, w0
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i16:
@@ -579,9 +597,12 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i32:
@@ -608,9 +629,12 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w0, #0x1
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.s, w8
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i32:
@@ -703,11 +727,15 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) {
 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v1i64:
@@ -729,11 +757,15 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) {
 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i64:
@@ -757,6 +789,7 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) {
 define void @select_v4i64(ptr %a, ptr %b, i1 %mask) {
 ; CHECK-LABEL: select_v4i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-NEXT:    and x8, x2, #0x1
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    mov z0.d, x8

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 7e508cab610a3d..d0f99211e80fce 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -13,9 +13,12 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: ashr_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i8:
@@ -50,7 +53,10 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: ashr_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v8i8:
@@ -101,7 +107,10 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: ashr_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v16i8:
@@ -342,9 +351,12 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: ashr_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i16:
@@ -370,7 +382,10 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: ashr_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i16:
@@ -405,7 +420,10 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: ashr_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v8i16:
@@ -550,7 +568,10 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: ashr_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i32:
@@ -575,7 +596,10 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: ashr_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v4i32:
@@ -660,7 +684,10 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: ashr_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v1i64:
@@ -682,7 +709,10 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: ashr_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ashr_v2i64:
@@ -752,10 +782,13 @@ define void @ashr_v4i64(ptr %a, ptr %b) {
 define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: lshr_v4i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    and z0.h, z0.h, #0xff
-; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i8:
@@ -790,7 +823,10 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: lshr_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v8i8:
@@ -841,7 +877,10 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: lshr_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v16i8:
@@ -1081,10 +1120,13 @@ define void @lshr_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: lshr_v2i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    and z1.s, z1.s, #0xffff
 ; CHECK-NEXT:    and z0.s, z0.s, #0xffff
-; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i16:
@@ -1110,7 +1152,10 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: lshr_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i16:
@@ -1145,7 +1190,10 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: lshr_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v8i16:
@@ -1290,7 +1338,10 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: lshr_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i32:
@@ -1315,7 +1366,10 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: lshr_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v4i32:
@@ -1400,7 +1454,10 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: lshr_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v1i64:
@@ -1422,7 +1479,10 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: lshr_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: lshr_v2i64:
@@ -1492,9 +1552,12 @@ define void @lshr_v4i64(ptr %a, ptr %b) {
 define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 ; CHECK-LABEL: shl_v2i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z1.s, z1.s, #0xff
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    and z1.s, z1.s, #0xff
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i8:
@@ -1519,9 +1582,12 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) {
 define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: shl_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    and z1.h, z1.h, #0xff
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i8:
@@ -1556,7 +1622,10 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: shl_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v8i8:
@@ -1607,7 +1676,10 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: shl_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v16i8:
@@ -1848,7 +1920,10 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: shl_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i16:
@@ -1883,7 +1958,10 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: shl_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v8i16:
@@ -2028,7 +2106,10 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: shl_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i32:
@@ -2053,7 +2134,10 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: shl_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v4i32:
@@ -2138,7 +2222,10 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) {
 ; CHECK-LABEL: shl_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v1i64:
@@ -2160,7 +2247,10 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: shl_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shl_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index d5fe91a5ac8a2c..e595686cb4975d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -12,7 +12,9 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v4i16_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f16:
@@ -194,9 +196,11 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) {
 define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v2i16_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f32:
@@ -218,9 +222,11 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) {
 define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v4i16_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i16_v4f32:
@@ -382,6 +388,7 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) {
 define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v1i16_v1f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w8, w8, #0xffff
 ; CHECK-NEXT:    ucvtf d0, w8
@@ -404,10 +411,12 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
 define <2 x double> @ucvtf_v2i16_v2f64(<2 x i16> %op1) {
 ; CHECK-LABEL: ucvtf_v2i16_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i16_v2f64:
@@ -693,8 +702,10 @@ define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f16:
@@ -720,8 +731,10 @@ define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v4i32_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f16:
@@ -760,6 +773,7 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v8i32_v8f16:
@@ -906,7 +920,9 @@ define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v2i32_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f32:
@@ -928,7 +944,9 @@ define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v4i32_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i32_v4f32:
@@ -998,9 +1016,11 @@ define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) {
 define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) {
 ; CHECK-LABEL: ucvtf_v2i32_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i32_v2f64:
@@ -1120,12 +1140,14 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) {
 define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) {
 ; CHECK-LABEL: ucvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    fmov x9, d1
 ; CHECK-NEXT:    ucvtf h0, x8
 ; CHECK-NEXT:    ucvtf h1, x9
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f16:
@@ -1160,6 +1182,7 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f16:
@@ -1215,6 +1238,7 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z0.h, z1.h, z1.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z2.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v8i64_v8f16:
@@ -1270,8 +1294,10 @@ define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) {
 ; CHECK-LABEL: ucvtf_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f32:
@@ -1300,6 +1326,7 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v4i64_v4f32:
@@ -1386,7 +1413,9 @@ define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) {
 ; CHECK-LABEL: ucvtf_v2i64_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ucvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: ucvtf_v2i64_v2f64:
@@ -1445,7 +1474,9 @@ define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v4i16_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f16:
@@ -1627,8 +1658,10 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v2i16_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f32:
@@ -1650,9 +1683,11 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) {
 define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v4i16_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i16_v4f32:
@@ -1815,10 +1850,12 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) {
 ; CHECK-LABEL: scvtf_v2i16_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i16_v2f64:
@@ -2104,8 +2141,10 @@ define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v2i32_v2f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f16:
@@ -2131,8 +2170,10 @@ define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v4i32_v4f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f16:
@@ -2171,6 +2212,7 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v8i32_v8f16:
@@ -2222,7 +2264,9 @@ define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v2i32_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f32:
@@ -2244,7 +2288,9 @@ define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v4i32_v4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i32_v4f32:
@@ -2314,9 +2360,11 @@ define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) {
 define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) {
 ; CHECK-LABEL: scvtf_v2i32_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    sunpklo z0.d, z0.s
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i32_v2f64:
@@ -2546,12 +2594,14 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) {
 define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) {
 ; CHECK-LABEL: scvtf_v2i64_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z1.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    fmov x9, d1
 ; CHECK-NEXT:    scvtf h0, x8
 ; CHECK-NEXT:    scvtf h1, x9
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f16:
@@ -2586,6 +2636,7 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcvt z0.h, p0/m, z0.s
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f16:
@@ -2623,8 +2674,10 @@ define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) {
 ; CHECK-LABEL: scvtf_v2i64_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f32:
@@ -2653,6 +2706,7 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) {
 ; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
 ; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v4i64_v4f32:
@@ -2684,7 +2738,9 @@ define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) {
 ; CHECK-LABEL: scvtf_v2i64_v2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    scvtf z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: scvtf_v2i64_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 1cd2606c3c3ffc..41eb731fd66dfe 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -8,12 +8,16 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i8:
@@ -60,12 +64,16 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, <4 x i1> %mask) {
 define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.b, z2.b, #7
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.b, z2.b, #7
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
 ; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i8:
@@ -140,12 +148,16 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) {
 define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) {
 ; CHECK-LABEL: select_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.b, z2.b, #7
+; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
 ; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    lsl z2.b, z2.b, #7
 ; CHECK-NEXT:    asr z2.b, z2.b, #7
 ; CHECK-NEXT:    and z2.b, z2.b, #0x1
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z2.b, #0
 ; CHECK-NEXT:    sel z0.b, p0, z0.b, z1.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v16i8:
@@ -498,12 +510,16 @@ define void @select_v32i8(ptr %a, ptr %b) {
 define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i16:
@@ -535,12 +551,16 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, <2 x i1> %mask) {
 define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.h, z2.h, #15
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i16:
@@ -587,13 +607,17 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) {
 define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) {
 ; CHECK-LABEL: select_v8i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z2.h, z2.b
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    uunpklo z2.h, z2.b
 ; CHECK-NEXT:    lsl z2.h, z2.h, #15
 ; CHECK-NEXT:    asr z2.h, z2.h, #15
 ; CHECK-NEXT:    and z2.h, z2.h, #0x1
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z2.h, #0
 ; CHECK-NEXT:    sel z0.h, p0, z0.h, z1.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v8i16:
@@ -784,12 +808,16 @@ define void @select_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z2.s, z2.s, #31
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
+; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i32:
@@ -821,13 +849,17 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) {
 define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) {
 ; CHECK-LABEL: select_v4i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z2.s, z2.h
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    uunpklo z2.s, z2.h
 ; CHECK-NEXT:    lsl z2.s, z2.s, #31
 ; CHECK-NEXT:    asr z2.s, z2.s, #31
 ; CHECK-NEXT:    and z2.s, z2.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z2.s, #0
 ; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v4i32:
@@ -935,11 +967,15 @@ define void @select_v8i32(ptr %a, ptr %b) {
 define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
 ; CHECK-LABEL: select_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x8, x0, #0x1
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z2.d, x8
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v1i64:
@@ -961,13 +997,17 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) {
 define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) {
 ; CHECK-LABEL: select_v2i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z2.d, z2.s
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
+; CHECK-NEXT:    uunpklo z2.d, z2.s
 ; CHECK-NEXT:    lsl z2.d, z2.d, #63
 ; CHECK-NEXT:    asr z2.d, z2.d, #63
 ; CHECK-NEXT:    and z2.d, z2.d, #0x1
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z2.d, #0
 ; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: select_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index ea49e49272a782..613543310f2c31 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -19,8 +19,8 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind {
 ; CHECK-NEXT:    ptrue p0.b, vl2
 ; CHECK-NEXT:    ld2b { z0.b, z1.b }, p0/z, [x20]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    mov z1.b, z0.b[1]
-; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    mov z2.b, z0.b[1]
+; CHECK-NEXT:    zip1 z0.s, z0.s, z2.s
 ; CHECK-NEXT:    st1b { z0.s }, p0, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
@@ -189,9 +189,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind {
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #4 // =0x4
 ; CHECK-NEXT:    ld2d { z0.d, z1.d }, p0/z, [x20]
-; CHECK-NEXT:    ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3]
+; CHECK-NEXT:    ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3]
 ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT:    stp q0, q1, [x19]
+; CHECK-NEXT:    stp q0, q2, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #96
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
index 43767c9eeaa34a..3627390b5edfa9 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll
@@ -15,6 +15,7 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) {
 ; CHECK-NEXT:    mov z0.s, z1.s[2]
 ; CHECK-NEXT:    add z1.s, z3.s, z3.s
 ; CHECK-NEXT:    add z3.s, z4.s, z4.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    stp q2, q5, [x0, #32]
 ; CHECK-NEXT:    stp q1, q3, [x0]
 ; CHECK-NEXT:    ret
@@ -106,6 +107,7 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) {
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    stp q1, q2, [x0, #32]
 ; CHECK-NEXT:    stp q3, q4, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test2:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index 38d22c1c90ff7b..504db6df18ee5a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -10,6 +10,7 @@ define <4 x i8> @load_v4i8(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v4i8:
@@ -78,6 +79,7 @@ define <2 x i16> @load_v2i16(ptr %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: load_v2i16:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
index 1184d4415d0fb2..d4565c4b69c779 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll
@@ -14,6 +14,7 @@ define i8 @andv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: andv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -40,6 +41,7 @@ define i8 @andv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: andv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -74,6 +76,7 @@ define i8 @andv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: andv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -208,6 +211,7 @@ define i16 @andv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: andv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -229,6 +233,7 @@ define i16 @andv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: andv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -255,6 +260,7 @@ define i16 @andv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: andv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -341,6 +347,7 @@ define i32 @andv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: andv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -362,6 +369,7 @@ define i32 @andv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: andv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -417,6 +425,7 @@ define i64 @andv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: andv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    andv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -466,6 +475,7 @@ define i8 @eorv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: eorv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -492,6 +502,7 @@ define i8 @eorv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: eorv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -526,6 +537,7 @@ define i8 @eorv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: eorv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -660,6 +672,7 @@ define i16 @eorv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: eorv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -681,6 +694,7 @@ define i16 @eorv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: eorv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -707,6 +721,7 @@ define i16 @eorv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: eorv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -793,6 +808,7 @@ define i32 @eorv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: eorv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -814,6 +830,7 @@ define i32 @eorv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: eorv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -869,6 +886,7 @@ define i64 @eorv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: eorv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    eorv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
@@ -918,6 +936,7 @@ define i8 @orv_v4i8(<4 x i8> %a) {
 ; CHECK-LABEL: orv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -944,6 +963,7 @@ define i8 @orv_v8i8(<8 x i8> %a) {
 ; CHECK-LABEL: orv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -978,6 +998,7 @@ define i8 @orv_v16i8(<16 x i8> %a) {
 ; CHECK-LABEL: orv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1112,6 +1133,7 @@ define i16 @orv_v2i16(<2 x i16> %a) {
 ; CHECK-LABEL: orv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1133,6 +1155,7 @@ define i16 @orv_v4i16(<4 x i16> %a) {
 ; CHECK-LABEL: orv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1159,6 +1182,7 @@ define i16 @orv_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: orv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv h0, p0, z0.h
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1245,6 +1269,7 @@ define i32 @orv_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: orv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1266,6 +1291,7 @@ define i32 @orv_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: orv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv s0, p0, z0.s
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
@@ -1321,6 +1347,7 @@ define i64 @orv_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: orv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    orv d0, p0, z0.d
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
index 1dca9880925709..c8cea6ebabd484 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-gather-scatter.ll
@@ -43,6 +43,7 @@ define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
 ; CHECK-NEXT:    ldr x8, [x8]
 ; CHECK-NEXT:    mov z0.d, p0/m, x8
 ; CHECK-NEXT:  .LBB0_4: // %else2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
index d66164b146b6c2..48a642c908bfe6 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll
@@ -12,11 +12,13 @@ target triple = "aarch64-unknown-linux-gnu"
 define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1b { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4i8:
@@ -106,11 +108,13 @@ define <4 x i8> @masked_load_v4i8(ptr %src, <4 x i1> %mask) {
 define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    lsl z0.b, z0.b, #7
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v8i8:
@@ -299,11 +303,13 @@ define <8 x i8> @masked_load_v8i8(ptr %src, <8 x i1> %mask) {
 define <16 x i8> @masked_load_v16i8(ptr %src, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    lsl z0.b, z0.b, #7
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v16i8:
@@ -740,6 +746,8 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0
 ; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
 ; CHECK-NEXT:    ld1b { z1.b }, p1/z, [x0, x8]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
 ;
@@ -1458,6 +1466,7 @@ define <32 x i8> @masked_load_v32i8(ptr %src, <32 x i1> %mask) {
 define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    ptrue p0.h, vl4
@@ -1468,6 +1477,7 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v2f16:
@@ -1510,11 +1520,13 @@ define <2 x half> @masked_load_v2f16(ptr %src, <2 x i1> %mask) {
 define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4f16:
@@ -1605,12 +1617,14 @@ define <4 x half> @masked_load_v4f16(ptr %src, <4 x i1> %mask) {
 define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v8f16:
@@ -1800,19 +1814,22 @@ define <8 x half> @masked_load_v8f16(ptr %src, <8 x i1> %mask) {
 define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v16f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    lsl z1.h, z1.h, #15
-; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z1.h, z1.h, #15
-; CHECK-NEXT:    asr z0.h, z0.h, #15
+; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p1.h, p0/z, z1.h, #0
+; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ld1h { z0.h }, p1/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0, x8, lsl #1]
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v16f16:
@@ -2150,11 +2167,13 @@ define <16 x half> @masked_load_v16f16(ptr %src, <16 x i1> %mask) {
 define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.s, z0.s, #31
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v2f32:
@@ -2199,12 +2218,14 @@ define <2 x float> @masked_load_v2f32(ptr %src, <2 x i1> %mask) {
 define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4f32:
@@ -2291,6 +2312,7 @@ define <4 x float> @masked_load_v4f32(ptr %src, <4 x i1> %mask) {
 define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v8f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[3]
 ; CHECK-NEXT:    mov z2.b, z0.b[2]
 ; CHECK-NEXT:    mov x8, #4 // =0x4
@@ -2316,6 +2338,8 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; CHECK-NEXT:    ld1w { z0.s }, p1/z, [x0]
 ; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0, x8, lsl #2]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v8f32:
@@ -2492,12 +2516,14 @@ define <8 x float> @masked_load_v8f32(ptr %src, <8 x i1> %mask) {
 define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    lsl z0.d, z0.d, #63
 ; CHECK-NEXT:    asr z0.d, z0.d, #63
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v2f64:
@@ -2542,9 +2568,10 @@ define <2 x double> @masked_load_v2f64(ptr %src, <2 x i1> %mask) {
 define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_load_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #2 // =0x2
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s
@@ -2555,7 +2582,9 @@ define <4 x double> @masked_load_v4f64(ptr %src, <4 x i1> %mask) {
 ; CHECK-NEXT:    cmpne p1.d, p0/z, z1.d, #0
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
 ; CHECK-NEXT:    ld1d { z0.d }, p1/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, x8, lsl #3]
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z1
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_v4f64:
@@ -2651,6 +2680,7 @@ define <3 x i32> @masked_load_zext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_zext_v3i32:
@@ -2723,6 +2753,7 @@ define <3 x i32> @masked_load_sext_v3i32(ptr %load_ptr, <3 x i1> %pm) {
 ; CHECK-NEXT:    sunpklo z0.s, z0.h
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ld1sh { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: masked_load_sext_v3i32:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
index 9b6f571c1e008d..265480b571970f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll
@@ -12,8 +12,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    mov z0.h, #0 // =0x0
@@ -67,8 +68,9 @@ define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
 define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    lsl z0.b, z0.b, #7
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    mov z0.b, #0 // =0x0
@@ -159,8 +161,9 @@ define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
 define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.b, z0.b, #7
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    lsl z0.b, z0.b, #7
 ; CHECK-NEXT:    asr z0.b, z0.b, #7
 ; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    mov z0.b, #0 // =0x0
@@ -586,6 +589,7 @@ define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
 define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    fmov s1, wzr
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    ptrue p0.h, vl4
@@ -630,8 +634,9 @@ define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
 define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    lsl z0.h, z0.h, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    mov z0.h, #0 // =0x0
@@ -689,8 +694,9 @@ define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
 define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8f16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.h, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
@@ -790,14 +796,15 @@ define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
 define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v16f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    uunpklo z1.h, z0.b
-; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
-; CHECK-NEXT:    mov x8, #8 // =0x8
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.h, z0.b
 ; CHECK-NEXT:    lsl z1.h, z1.h, #15
-; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z1.h, z1.h, #15
+; CHECK-NEXT:    lsl z0.h, z0.h, #15
 ; CHECK-NEXT:    asr z0.h, z0.h, #15
 ; CHECK-NEXT:    cmpne p1.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z1.h, #0
@@ -945,8 +952,9 @@ define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
 define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    lsl z0.s, z0.s, #31
 ; CHECK-NEXT:    asr z0.s, z0.s, #31
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
@@ -1001,6 +1009,7 @@ define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
 define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v8f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z1.b, z0.b[7]
 ; CHECK-NEXT:    mov z2.b, z0.b[6]
 ; CHECK-NEXT:    mov x8, #4 // =0x4
@@ -1113,8 +1122,9 @@ define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
 define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v2f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    uunpklo z0.d, z0.s
 ; CHECK-NEXT:    lsl z0.d, z0.d, #63
 ; CHECK-NEXT:    asr z0.d, z0.d, #63
 ; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
@@ -1151,9 +1161,10 @@ define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
 define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) {
 ; CHECK-LABEL: masked_store_v4f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    mov x8, #2 // =0x2
+; CHECK-NEXT:    uunpklo z0.s, z0.h
 ; CHECK-NEXT:    uunpklo z1.d, z0.s
 ; CHECK-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; CHECK-NEXT:    uunpklo z0.d, z0.s

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
index 86d00978a53404..a33e8537edf4ee 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll
@@ -480,6 +480,7 @@ define <16 x i8> @test_revv16i8(ptr %a) {
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    ldr q0, [x0]
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: test_revv16i8:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
index 6eb79f24386cf8..e07036f2a1acfc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll
@@ -28,9 +28,9 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
@@ -126,43 +126,43 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q4, q5, [x0]
 ; CHECK-NEXT:    fcmne p1.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    ldp q1, q6, [x1]
-; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
+; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fcmne p5.s, p0/z, z2.s, #0.0
 ; CHECK-NEXT:    fcmne p4.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p7.s, p0/z, z4.s, #0.0
 ; CHECK-NEXT:    fcmne p6.s, p0/z, z6.s, #0.0
 ; CHECK-NEXT:    fcmne p0.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z2.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z4.s, p5/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z3.s, p4/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z6.s, p7/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z5.s, p6/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z7.s, p0/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z19.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z19.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z18.h, z6.h, z6.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
-; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z3.h, p0, { z3.h, z4.h }
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z0.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
+; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    splice z1.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    orr z0.d, z2.d, z0.d
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
 ; CHECK-NEXT:    umaxv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1
@@ -335,43 +335,43 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) {
 ; CHECK-NEXT:    ldp q4, q5, [x0]
 ; CHECK-NEXT:    fcmne p1.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    ldp q1, q6, [x1]
-; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fcmne p3.s, p0/z, z3.s, #0.0
+; CHECK-NEXT:    fcmne p2.s, p0/z, z0.s, #0.0
 ; CHECK-NEXT:    fcmne p5.s, p0/z, z2.s, #0.0
 ; CHECK-NEXT:    fcmne p4.s, p0/z, z5.s, #0.0
 ; CHECK-NEXT:    fcmne p7.s, p0/z, z4.s, #0.0
 ; CHECK-NEXT:    fcmne p6.s, p0/z, z6.s, #0.0
 ; CHECK-NEXT:    fcmne p0.s, p0/z, z1.s, #0.0
 ; CHECK-NEXT:    mov z0.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z2.s, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.s, p2/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z4.s, p5/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z3.s, p4/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z6.s, p7/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z5.s, p6/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    mov z7.s, p0/z, #-1 // =0xffffffffffffffff
 ; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z19.h, z2.h, z2.h
 ; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z1.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z19.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z18.h, z6.h, z6.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
-; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z3.h, p0, { z3.h, z4.h }
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z0.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
+; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z4.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    splice z1.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    ptrue p0.b, vl16
-; CHECK-NEXT:    and z0.d, z2.d, z0.d
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
 ; CHECK-NEXT:    uminv b0, p0, z0.b
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    and w0, w8, #0x1

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
index 019ab10d6d444f..00a15f4bcd6394 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
@@ -36,11 +36,13 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
+; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; STREAMING-SVE-NEXT:    uunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-SVE-NEXT:    uunpklo z3.h, z0.b
-; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
+; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; STREAMING-SVE-NEXT:    uunpklo z1.h, z1.b
 ; STREAMING-SVE-NEXT:    uunpklo z0.h, z0.b
 ; STREAMING-SVE-NEXT:    uunpklo z4.s, z2.h
@@ -48,9 +50,9 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    uunpklo z6.s, z3.h
 ; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; STREAMING-SVE-NEXT:    mov z5.d, z1.d
-; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    uunpklo z7.s, z0.h
 ; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    uunpklo z3.s, z3.h
 ; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
 ; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
@@ -65,6 +67,7 @@ define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
 ; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
 ; STREAMING-SVE-NEXT:    fmov x0, d0
+; STREAMING-SVE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-SVE-NEXT:    ret
   %1 = zext <32 x i8> %a to <32 x i32>
   %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)
@@ -101,11 +104,13 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ;
 ; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
 ; STREAMING-SVE:       // %bb.0:
+; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; STREAMING-SVE-NEXT:    sunpklo z2.h, z1.b
-; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; STREAMING-SVE-NEXT:    sunpklo z3.h, z0.b
-; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
+; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
+; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
 ; STREAMING-SVE-NEXT:    sunpklo z1.h, z1.b
 ; STREAMING-SVE-NEXT:    sunpklo z0.h, z0.b
 ; STREAMING-SVE-NEXT:    sunpklo z4.s, z2.h
@@ -113,9 +118,9 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    sunpklo z6.s, z3.h
 ; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
 ; STREAMING-SVE-NEXT:    mov z5.d, z1.d
-; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    sunpklo z7.s, z0.h
 ; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
+; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
 ; STREAMING-SVE-NEXT:    sunpklo z3.s, z3.h
 ; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
 ; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
@@ -130,6 +135,7 @@ define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
 ; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
 ; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
 ; STREAMING-SVE-NEXT:    fmov x0, d0
+; STREAMING-SVE-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; STREAMING-SVE-NEXT:    ret
   %1 = sext <32 x i8> %a to <32 x i32>
   %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
index 381ae5a0b38414..c942f1eca8ebaf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll
@@ -17,6 +17,7 @@ define <4 x i1> @reshuffle_v4i1_nxv4i1(<vscale x 4 x i1> %a) {
 ; CHECK-NEXT:    zip1 z1.h, z2.h, z1.h
 ; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
 ; CHECK-NEXT:    zip1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
   %el0 = extractelement <vscale x 4 x i1> %a, i32 0
   %el1 = extractelement <vscale x 4 x i1> %a, i32 1

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index 5fe86db11b07a5..c34cae12516ed8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -14,8 +14,10 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
 ; CHECK-NEXT:    lsr z0.h, z0.h, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i8:
@@ -49,7 +51,9 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v8i8:
@@ -99,7 +103,9 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) {
 ; CHECK-LABEL: bitreverse_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.b, p0/m, z0.b
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v16i8:
@@ -334,8 +340,10 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i16:
@@ -360,7 +368,9 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i16:
@@ -394,7 +404,9 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: bitreverse_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v8i16:
@@ -533,7 +545,9 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: bitreverse_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i32:
@@ -556,7 +570,9 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: bitreverse_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v4i32:
@@ -629,7 +645,9 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: bitreverse_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v1i64:
@@ -650,7 +668,9 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: bitreverse_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    rbit z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bitreverse_v2i64:
@@ -712,8 +732,10 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) {
 ; CHECK-LABEL: bswap_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
 ; CHECK-NEXT:    lsr z0.s, z0.s, #16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i16:
@@ -752,7 +774,9 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) {
 ; CHECK-LABEL: bswap_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v4i16:
@@ -786,7 +810,9 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) {
 ; CHECK-LABEL: bswap_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.h, p0/m, z0.h
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v8i16:
@@ -928,7 +954,9 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) {
 ; CHECK-LABEL: bswap_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i32:
@@ -962,7 +990,9 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) {
 ; CHECK-LABEL: bswap_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.s, p0/m, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v4i32:
@@ -1104,7 +1134,9 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) {
 ; CHECK-LABEL: bswap_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v1i64:
@@ -1138,7 +1170,9 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) {
 ; CHECK-LABEL: bswap_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    revb z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: bswap_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index dd7d2264188f46..85ba9640002347 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -10,8 +10,10 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i8:
@@ -57,7 +59,9 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i8:
@@ -123,7 +127,9 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl16
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v16i8:
@@ -454,8 +460,10 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i16:
@@ -483,7 +491,9 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i16:
@@ -525,7 +535,9 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h, vl8
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v8i16:
@@ -712,7 +724,9 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i32:
@@ -739,7 +753,9 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v4i32:
@@ -836,7 +852,9 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v1i64:
@@ -860,7 +878,9 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: sdiv_v2i64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
index 27ae572206232e..c7b2575266d65b 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll
@@ -9,8 +9,9 @@ target triple = "aarch64-unknown-linux-gnu"
 define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) {
 ; CHECK-LABEL: hang_when_merging_stores_after_legalisation:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    mov z1.d, z0.d
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
@@ -37,6 +38,8 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2
 ; CHECK-LABEL: interleave_store_without_splat:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2w { z0.s, z1.s }, p0, [x0]
 ; CHECK-NEXT:    ret
 ;
@@ -72,12 +75,13 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2)
 ; CHECK-LABEL: interleave_store_legalization:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z0.d
-; CHECK-NEXT:    mov x8, #8 // =0x8
-; CHECK-NEXT:    mov z2.d, z3.d
+; CHECK-NEXT:    // kill: def $q3 killed $q3 def $z2_z3
 ; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    mov x8, #8 // =0x8
+; CHECK-NEXT:    mov z4.d, z0.d
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st2w { z4.s, z5.s }, p0, [x0]
-; CHECK-NEXT:    st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2]
+; CHECK-NEXT:    st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: interleave_store_legalization:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
index 071ab0bc1a7132..a4cf5d608fed6d 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
@@ -14,6 +14,7 @@ define <4 x i8> @splat_v4i8(i8 %a) {
 ; CHECK-LABEL: splat_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4i8:
@@ -36,6 +37,7 @@ define <8 x i8> @splat_v8i8(i8 %a) {
 ; CHECK-LABEL: splat_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v8i8:
@@ -62,6 +64,7 @@ define <16 x i8> @splat_v16i8(i8 %a) {
 ; CHECK-LABEL: splat_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.b, w0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v16i8:
@@ -132,6 +135,7 @@ define <2 x i16> @splat_v2i16(i16 %a) {
 ; CHECK-LABEL: splat_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2i16:
@@ -151,6 +155,7 @@ define <4 x i16> @splat_v4i16(i16 %a) {
 ; CHECK-LABEL: splat_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4i16:
@@ -173,6 +178,7 @@ define <8 x i16> @splat_v8i16(i16 %a) {
 ; CHECK-LABEL: splat_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.h, w0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v8i16:
@@ -227,6 +233,7 @@ define <2 x i32> @splat_v2i32(i32 %a) {
 ; CHECK-LABEL: splat_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2i32:
@@ -246,6 +253,7 @@ define <4 x i32> @splat_v4i32(i32 %a) {
 ; CHECK-LABEL: splat_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.s, w0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4i32:
@@ -288,6 +296,7 @@ define <1 x i64> @splat_v1i64(i64 %a) {
 ; CHECK-LABEL: splat_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v1i64:
@@ -307,6 +316,7 @@ define <2 x i64> @splat_v2i64(i64 %a) {
 ; CHECK-LABEL: splat_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2i64:
@@ -348,7 +358,9 @@ define void @splat_v4i64(i64 %a, ptr %b) {
 define <2 x half> @splat_v2f16(half %a) {
 ; CHECK-LABEL: splat_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2f16:
@@ -368,7 +380,9 @@ define <2 x half> @splat_v2f16(half %a) {
 define <4 x half> @splat_v4f16(half %a) {
 ; CHECK-LABEL: splat_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4f16:
@@ -390,7 +404,9 @@ define <4 x half> @splat_v4f16(half %a) {
 define <8 x half> @splat_v8f16(half %a) {
 ; CHECK-LABEL: splat_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v8f16:
@@ -415,6 +431,7 @@ define <8 x half> @splat_v8f16(half %a) {
 define void @splat_v16f16(half %a, ptr %b) {
 ; CHECK-LABEL: splat_v16f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret
@@ -444,7 +461,9 @@ define void @splat_v16f16(half %a, ptr %b) {
 define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) {
 ; CHECK-LABEL: splat_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2f32:
@@ -463,7 +482,9 @@ define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) {
 define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) {
 ; CHECK-LABEL: splat_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v4f32:
@@ -482,6 +503,7 @@ define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) {
 define void @splat_v8f32(float %a, ptr %b) {
 ; CHECK-LABEL: splat_v8f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret
@@ -523,7 +545,9 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) {
 define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) {
 ; CHECK-LABEL: splat_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: splat_v2f64:
@@ -540,6 +564,7 @@ define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) {
 define void @splat_v4f64(double %a, ptr %b) {
 ; CHECK-LABEL: splat_v4f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    stp q0, q0, [x0]
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 409a42e8d478fd..13fcd94ea8a260 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -129,8 +129,8 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) {
 define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
 ; CHECK-LABEL: store_trunc_v2i256i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d, vl1
 ; CHECK-NEXT:    ldr d1, [x0, #32]
+; CHECK-NEXT:    ptrue p0.d, vl1
 ; CHECK-NEXT:    ldr d0, [x0]
 ; CHECK-NEXT:    splice z0.d, p0, { z0.d, z1.d }
 ; CHECK-NEXT:    str q0, [x1]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
index 03f377a5986871..9d241f6f927e11 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll
@@ -15,9 +15,10 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z2.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8:
@@ -222,16 +223,16 @@ define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
 ; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z17.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z3.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
 ; CHECK-NEXT:    splice z0.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z3.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    splice z3.b, p0, { z4.b, z5.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    add z2.b, z2.b, z2.b
@@ -526,49 +527,49 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    ldp q16, q17, [x0, #224]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
-; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    ldp q18, q19, [x0, #128]
-; CHECK-NEXT:    uzp1 z17.b, z17.b, z17.b
-; CHECK-NEXT:    ldp q20, q21, [x0, #160]
-; CHECK-NEXT:    uzp1 z16.b, z16.b, z16.b
-; CHECK-NEXT:    ldp q22, q23, [x0, #96]
-; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
+; CHECK-NEXT:    ldp q3, q18, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
+; CHECK-NEXT:    ldp q2, q19, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
+; CHECK-NEXT:    uzp1 z21.b, z18.b, z18.b
+; CHECK-NEXT:    ldp q18, q22, [x0, #160]
+; CHECK-NEXT:    uzp1 z20.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z24.b, z19.b, z19.b
+; CHECK-NEXT:    ldp q3, q19, [x0, #96]
+; CHECK-NEXT:    uzp1 z23.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z26.b, z22.b, z22.b
+; CHECK-NEXT:    splice z2.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z25.b, z18.b, z18.b
+; CHECK-NEXT:    splice z7.b, p0, { z20.b, z21.b }
+; CHECK-NEXT:    uzp1 z21.b, z5.b, z5.b
 ; CHECK-NEXT:    uzp1 z19.b, z19.b, z19.b
-; CHECK-NEXT:    uzp1 z18.b, z18.b, z18.b
-; CHECK-NEXT:    uzp1 z7.b, z7.b, z7.b
-; CHECK-NEXT:    uzp1 z21.b, z21.b, z21.b
-; CHECK-NEXT:    uzp1 z20.b, z20.b, z20.b
-; CHECK-NEXT:    splice z3.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT:    uzp1 z17.b, z23.b, z23.b
-; CHECK-NEXT:    uzp1 z16.b, z22.b, z22.b
-; CHECK-NEXT:    uzp1 z5.b, z5.b, z5.b
-; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z1.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    splice z18.b, p0, { z18.b, z19.b }
-; CHECK-NEXT:    splice z19.b, p0, { z20.b, z21.b }
-; CHECK-NEXT:    splice z6.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
-; CHECK-NEXT:    add z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z20.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z5.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z16.b, z6.b, z6.b
+; CHECK-NEXT:    splice z6.b, p0, { z23.b, z24.b }
+; CHECK-NEXT:    uzp1 z18.b, z3.b, z3.b
+; CHECK-NEXT:    splice z3.b, p0, { z25.b, z26.b }
+; CHECK-NEXT:    uzp1 z4.b, z0.b, z0.b
+; CHECK-NEXT:    add z0.b, z2.b, z2.b
+; CHECK-NEXT:    add z7.b, z7.b, z7.b
+; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    splice z2.b, p0, { z18.b, z19.b }
+; CHECK-NEXT:    splice z16.b, p0, { z20.b, z21.b }
 ; CHECK-NEXT:    splice z4.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    add z6.b, z6.b, z6.b
 ; CHECK-NEXT:    add z3.b, z3.b, z3.b
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
-; CHECK-NEXT:    add z5.b, z18.b, z18.b
-; CHECK-NEXT:    add z1.b, z19.b, z19.b
-; CHECK-NEXT:    stp q2, q3, [x1, #96]
-; CHECK-NEXT:    add z2.b, z6.b, z6.b
-; CHECK-NEXT:    add z3.b, z7.b, z7.b
-; CHECK-NEXT:    add z4.b, z4.b, z4.b
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
-; CHECK-NEXT:    stp q5, q1, [x1, #64]
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q4, q0, [x1]
+; CHECK-NEXT:    stp q0, q7, [x1, #96]
+; CHECK-NEXT:    add z0.b, z1.b, z1.b
+; CHECK-NEXT:    add z1.b, z2.b, z2.b
+; CHECK-NEXT:    add z2.b, z16.b, z16.b
+; CHECK-NEXT:    stp q6, q3, [x1, #64]
+; CHECK-NEXT:    add z3.b, z4.b, z4.b
+; CHECK-NEXT:    stp q0, q1, [x1, #32]
+; CHECK-NEXT:    stp q2, q3, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
@@ -1182,10 +1183,11 @@ define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8:
@@ -1225,9 +1227,10 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8:
@@ -1279,25 +1282,25 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z17.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z16.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z19.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z1.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z3.h, p0, { z6.h, z7.h }
+; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
-; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
-; CHECK-NEXT:    splice z1.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
+; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z2.b, z5.b, z5.b
+; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
+; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -1426,56 +1429,56 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #160]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    ldp q16, q17, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #64]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #96]
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    ldp q18, q19, [x0, #192]
-; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
-; CHECK-NEXT:    ldp q20, q21, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.h, z16.h, z16.h
-; CHECK-NEXT:    ldp q22, q23, [x0, #32]
+; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT:    ldp q3, q18, [x0, #128]
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT:    ldp q2, q19, [x0, #192]
+; CHECK-NEXT:    ldp q0, q1, [x0, #64]
+; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT:    ldp q18, q22, [x0, #224]
+; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
+; CHECK-NEXT:    ldp q3, q23, [x0, #32]
+; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z27.h, z19.h, z19.h
+; CHECK-NEXT:    uzp1 z25.h, z22.h, z22.h
+; CHECK-NEXT:    uzp1 z26.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z24.h, z18.h, z18.h
+; CHECK-NEXT:    uzp1 z18.h, z23.h, z23.h
+; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
+; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT:    splice z1.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    splice z6.h, p0, { z24.h, z25.h }
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z26.h, z27.h }
+; CHECK-NEXT:    splice z7.h, p0, { z17.h, z18.h }
+; CHECK-NEXT:    uzp1 z17.b, z16.b, z16.b
 ; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
-; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z21.h, z21.h, z21.h
-; CHECK-NEXT:    uzp1 z20.h, z20.h, z20.h
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z23.h, z23.h, z23.h
-; CHECK-NEXT:    uzp1 z22.h, z22.h, z22.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
-; CHECK-NEXT:    splice z17.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z18.h, p0, { z22.h, z23.h }
-; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    splice z3.h, p0, { z22.h, z23.h }
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
+; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z20.b, z16.b, z16.b
-; CHECK-NEXT:    uzp1 z19.b, z17.b, z17.b
-; CHECK-NEXT:    uzp1 z7.b, z18.b, z18.b
 ; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
-; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
-; CHECK-NEXT:    splice z1.b, p0, { z1.b, z2.b }
-; CHECK-NEXT:    uzp1 z2.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z19.b, z20.b }
-; CHECK-NEXT:    splice z4.b, p0, { z6.b, z7.b }
-; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
-; CHECK-NEXT:    add z1.b, z1.b, z1.b
-; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
+; CHECK-NEXT:    uzp1 z1.b, z7.b, z7.b
+; CHECK-NEXT:    uzp1 z0.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
+; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
+; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
+; CHECK-NEXT:    splice z4.b, p0, { z5.b, z6.b }
+; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
+; CHECK-NEXT:    add z2.b, z7.b, z7.b
 ; CHECK-NEXT:    add z3.b, z4.b, z4.b
-; CHECK-NEXT:    add z2.b, z2.b, z2.b
-; CHECK-NEXT:    stp q1, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q2, [x1]
+; CHECK-NEXT:    add z0.b, z0.b, z0.b
+; CHECK-NEXT:    add z1.b, z1.b, z1.b
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q0, q1, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
@@ -1764,9 +1767,10 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16:
@@ -1880,16 +1884,16 @@ define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
 ; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z17.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
 ; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
 ; CHECK-NEXT:    add z0.h, z0.h, z0.h
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
 ; CHECK-NEXT:    add z2.h, z2.h, z2.h
@@ -2023,49 +2027,49 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    ldp q16, q17, [x0, #224]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
-; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    ldp q18, q19, [x0, #128]
-; CHECK-NEXT:    uzp1 z17.h, z17.h, z17.h
-; CHECK-NEXT:    ldp q20, q21, [x0, #160]
-; CHECK-NEXT:    uzp1 z16.h, z16.h, z16.h
-; CHECK-NEXT:    ldp q22, q23, [x0, #96]
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
+; CHECK-NEXT:    ldp q3, q18, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
+; CHECK-NEXT:    ldp q2, q19, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
+; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
+; CHECK-NEXT:    ldp q18, q22, [x0, #160]
+; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z24.h, z19.h, z19.h
+; CHECK-NEXT:    ldp q3, q19, [x0, #96]
+; CHECK-NEXT:    uzp1 z23.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z26.h, z22.h, z22.h
+; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z25.h, z18.h, z18.h
+; CHECK-NEXT:    splice z7.h, p0, { z20.h, z21.h }
+; CHECK-NEXT:    uzp1 z21.h, z5.h, z5.h
 ; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
-; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z7.h, z7.h, z7.h
-; CHECK-NEXT:    uzp1 z21.h, z21.h, z21.h
-; CHECK-NEXT:    uzp1 z20.h, z20.h, z20.h
-; CHECK-NEXT:    splice z3.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z17.h, z23.h, z23.h
-; CHECK-NEXT:    uzp1 z16.h, z22.h, z22.h
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    splice z18.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z19.h, p0, { z20.h, z21.h }
-; CHECK-NEXT:    splice z6.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
-; CHECK-NEXT:    add z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z20.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z16.h, z6.h, z6.h
+; CHECK-NEXT:    splice z6.h, p0, { z23.h, z24.h }
+; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
+; CHECK-NEXT:    splice z3.h, p0, { z25.h, z26.h }
+; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
+; CHECK-NEXT:    add z0.h, z2.h, z2.h
+; CHECK-NEXT:    add z7.h, z7.h, z7.h
+; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
 ; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    add z6.h, z6.h, z6.h
 ; CHECK-NEXT:    add z3.h, z3.h, z3.h
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
-; CHECK-NEXT:    add z5.h, z18.h, z18.h
-; CHECK-NEXT:    add z1.h, z19.h, z19.h
-; CHECK-NEXT:    stp q2, q3, [x1, #96]
-; CHECK-NEXT:    add z2.h, z6.h, z6.h
-; CHECK-NEXT:    add z3.h, z7.h, z7.h
-; CHECK-NEXT:    add z4.h, z4.h, z4.h
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
-; CHECK-NEXT:    stp q5, q1, [x1, #64]
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q4, q0, [x1]
+; CHECK-NEXT:    stp q0, q7, [x1, #96]
+; CHECK-NEXT:    add z0.h, z1.h, z1.h
+; CHECK-NEXT:    add z1.h, z2.h, z2.h
+; CHECK-NEXT:    add z2.h, z16.h, z16.h
+; CHECK-NEXT:    stp q6, q3, [x1, #64]
+; CHECK-NEXT:    add z3.h, z4.h, z4.h
+; CHECK-NEXT:    stp q0, q1, [x1, #32]
+; CHECK-NEXT:    stp q2, q3, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
@@ -2358,10 +2362,11 @@ define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
+; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8:
@@ -2395,10 +2400,11 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
 ; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8:
@@ -2438,29 +2444,30 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z19.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z1.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z18.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z0.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
+; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    splice z1.s, p0, { z18.s, z19.s }
 ; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z1.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    splice z3.s, p0, { z6.s, z7.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z0.h, z3.h, z3.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
+; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
 ; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
+; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
+; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8:
@@ -2518,57 +2525,57 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q5, q6, [x0, #224]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    ldp q0, q1, [x0]
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT:    ldp q16, q17, [x0, #192]
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT:    ldp q18, q19, [x0, #128]
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT:    ldp q20, q21, [x0, #160]
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT:    ldp q22, q23, [x0, #96]
-; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
-; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
+; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
+; CHECK-NEXT:    ldp q6, q18, [x0, #192]
+; CHECK-NEXT:    uzp1 z16.s, z5.s, z5.s
+; CHECK-NEXT:    ldp q5, q19, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0]
+; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
+; CHECK-NEXT:    ldp q18, q22, [x0, #160]
+; CHECK-NEXT:    uzp1 z20.s, z6.s, z6.s
+; CHECK-NEXT:    ldp q6, q23, [x0, #96]
+; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
+; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z26.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z23.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z6.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z22.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z5.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT:    splice z3.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
+; CHECK-NEXT:    splice z0.s, p0, { z24.s, z25.s }
+; CHECK-NEXT:    splice z7.s, p0, { z26.s, z27.s }
+; CHECK-NEXT:    splice z4.s, p0, { z17.s, z18.s }
+; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
 ; CHECK-NEXT:    splice z5.s, p0, { z5.s, z6.s }
-; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
-; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
-; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z23.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z22.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z6.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    splice z18.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z17.s, p0, { z22.s, z23.s }
-; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
-; CHECK-NEXT:    uzp1 z5.h, z5.h, z5.h
-; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
+; CHECK-NEXT:    splice z6.s, p0, { z22.s, z23.s }
+; CHECK-NEXT:    splice z1.s, p0, { z1.s, z2.s }
+; CHECK-NEXT:    uzp1 z16.h, z3.h, z3.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
-; CHECK-NEXT:    uzp1 z19.h, z16.h, z16.h
-; CHECK-NEXT:    uzp1 z18.h, z18.h, z18.h
-; CHECK-NEXT:    uzp1 z7.h, z17.h, z17.h
-; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z3.h, p0, { z18.h, z19.h }
-; CHECK-NEXT:    splice z4.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z19.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z18.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
+; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
+; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
+; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
 ; CHECK-NEXT:    ptrue p0.b, vl8
-; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
-; CHECK-NEXT:    uzp1 z1.b, z3.b, z3.b
-; CHECK-NEXT:    uzp1 z4.b, z4.b, z4.b
-; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
-; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
-; CHECK-NEXT:    splice z1.b, p0, { z3.b, z4.b }
+; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
+; CHECK-NEXT:    uzp1 z7.b, z2.b, z2.b
+; CHECK-NEXT:    uzp1 z6.b, z3.b, z3.b
+; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
+; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
 ; CHECK-NEXT:    add z0.b, z0.b, z0.b
 ; CHECK-NEXT:    add z1.b, z1.b, z1.b
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -2726,10 +2733,11 @@ define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
+; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
 ; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16:
@@ -2763,9 +2771,10 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
 ; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
+; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16:
@@ -2806,25 +2815,25 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    ldp q2, q3, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #64]
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z1.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z19.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z0.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z18.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
+; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
 ; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z1.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    splice z3.s, p0, { z6.s, z7.s }
+; CHECK-NEXT:    splice z5.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
-; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
-; CHECK-NEXT:    splice z0.h, p0, { z4.h, z5.h }
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
+; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
+; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z2.h, z5.h, z5.h
+; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
+; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    splice z1.h, p0, { z6.h, z7.h }
 ; CHECK-NEXT:    add z0.h, z0.h, z0.h
 ; CHECK-NEXT:    add z1.h, z1.h, z1.h
 ; CHECK-NEXT:    stp q1, q0, [x1]
@@ -2906,56 +2915,56 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #160]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    ldp q16, q17, [x0, #128]
-; CHECK-NEXT:    ldp q0, q1, [x0, #64]
 ; CHECK-NEXT:    ldp q4, q5, [x0, #96]
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    ldp q6, q7, [x0]
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    ldp q18, q19, [x0, #192]
-; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
-; CHECK-NEXT:    ldp q20, q21, [x0, #224]
-; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
-; CHECK-NEXT:    ldp q22, q23, [x0, #32]
+; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT:    ldp q3, q18, [x0, #128]
+; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
+; CHECK-NEXT:    ldp q2, q19, [x0, #192]
+; CHECK-NEXT:    ldp q0, q1, [x0, #64]
+; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
+; CHECK-NEXT:    ldp q18, q22, [x0, #224]
+; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
+; CHECK-NEXT:    ldp q3, q23, [x0, #32]
+; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
+; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
+; CHECK-NEXT:    uzp1 z26.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
+; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
+; CHECK-NEXT:    uzp1 z23.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z22.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
+; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
+; CHECK-NEXT:    splice z1.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    splice z6.s, p0, { z24.s, z25.s }
+; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
+; CHECK-NEXT:    splice z0.s, p0, { z26.s, z27.s }
+; CHECK-NEXT:    splice z7.s, p0, { z17.s, z18.s }
+; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
 ; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
-; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z23.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z22.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    splice z17.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z18.s, p0, { z22.s, z23.s }
-; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
-; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
+; CHECK-NEXT:    splice z3.s, p0, { z22.s, z23.s }
 ; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
+; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
 ; CHECK-NEXT:    ptrue p0.h, vl4
-; CHECK-NEXT:    uzp1 z20.h, z16.h, z16.h
-; CHECK-NEXT:    uzp1 z19.h, z17.h, z17.h
-; CHECK-NEXT:    uzp1 z7.h, z18.h, z18.h
 ; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
-; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
-; CHECK-NEXT:    splice z1.h, p0, { z1.h, z2.h }
-; CHECK-NEXT:    uzp1 z2.h, z0.h, z0.h
-; CHECK-NEXT:    splice z0.h, p0, { z19.h, z20.h }
-; CHECK-NEXT:    splice z4.h, p0, { z6.h, z7.h }
-; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
-; CHECK-NEXT:    add z1.h, z1.h, z1.h
-; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
+; CHECK-NEXT:    uzp1 z1.h, z7.h, z7.h
+; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
+; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
+; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
+; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
+; CHECK-NEXT:    splice z4.h, p0, { z5.h, z6.h }
+; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
+; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
+; CHECK-NEXT:    add z2.h, z7.h, z7.h
 ; CHECK-NEXT:    add z3.h, z4.h, z4.h
-; CHECK-NEXT:    add z2.h, z2.h, z2.h
-; CHECK-NEXT:    stp q1, q0, [x1, #32]
-; CHECK-NEXT:    stp q3, q2, [x1]
+; CHECK-NEXT:    add z0.h, z0.h, z0.h
+; CHECK-NEXT:    add z1.h, z1.h, z1.h
+; CHECK-NEXT:    stp q2, q3, [x1, #32]
+; CHECK-NEXT:    stp q0, q1, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
@@ -3111,9 +3120,10 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q1, q0, [x0]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    uzp1 z2.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    splice z0.s, p0, { z1.s, z2.s }
+; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
+; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
+; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32:
@@ -3199,16 +3209,16 @@ define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
 ; CHECK-NEXT:    uzp1 z7.s, z0.s, z0.s
 ; CHECK-NEXT:    uzp1 z6.s, z1.s, z1.s
 ; CHECK-NEXT:    ldp q1, q0, [x0, #32]
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z17.s, z0.s, z0.s
-; CHECK-NEXT:    uzp1 z16.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z3.s, z5.s, z5.s
+; CHECK-NEXT:    uzp1 z2.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
 ; CHECK-NEXT:    splice z0.s, p0, { z6.s, z7.s }
-; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
-; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
-; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
+; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    splice z3.s, p0, { z4.s, z5.s }
 ; CHECK-NEXT:    add z0.s, z0.s, z0.s
 ; CHECK-NEXT:    add z1.s, z1.s, z1.s
 ; CHECK-NEXT:    add z2.s, z2.s, z2.s
@@ -3287,49 +3297,49 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ldp q2, q3, [x0, #192]
 ; CHECK-NEXT:    ptrue p0.s, vl2
-; CHECK-NEXT:    ldp q16, q17, [x0, #224]
-; CHECK-NEXT:    ldp q0, q1, [x0, #32]
 ; CHECK-NEXT:    ldp q4, q5, [x0]
-; CHECK-NEXT:    uzp1 z3.s, z3.s, z3.s
 ; CHECK-NEXT:    ldp q6, q7, [x0, #64]
-; CHECK-NEXT:    uzp1 z2.s, z2.s, z2.s
-; CHECK-NEXT:    ldp q18, q19, [x0, #128]
-; CHECK-NEXT:    uzp1 z17.s, z17.s, z17.s
-; CHECK-NEXT:    ldp q20, q21, [x0, #160]
-; CHECK-NEXT:    uzp1 z16.s, z16.s, z16.s
-; CHECK-NEXT:    ldp q22, q23, [x0, #96]
-; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
+; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
+; CHECK-NEXT:    ldp q3, q18, [x0, #224]
+; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
+; CHECK-NEXT:    ldp q2, q19, [x0, #128]
+; CHECK-NEXT:    ldp q0, q1, [x0, #32]
+; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
+; CHECK-NEXT:    ldp q18, q22, [x0, #160]
+; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
+; CHECK-NEXT:    uzp1 z24.s, z19.s, z19.s
+; CHECK-NEXT:    ldp q3, q19, [x0, #96]
+; CHECK-NEXT:    uzp1 z23.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z26.s, z22.s, z22.s
+; CHECK-NEXT:    splice z2.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    uzp1 z17.s, z7.s, z7.s
+; CHECK-NEXT:    uzp1 z25.s, z18.s, z18.s
+; CHECK-NEXT:    splice z7.s, p0, { z20.s, z21.s }
+; CHECK-NEXT:    uzp1 z21.s, z5.s, z5.s
 ; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
-; CHECK-NEXT:    uzp1 z18.s, z18.s, z18.s
-; CHECK-NEXT:    uzp1 z7.s, z7.s, z7.s
-; CHECK-NEXT:    uzp1 z21.s, z21.s, z21.s
-; CHECK-NEXT:    uzp1 z20.s, z20.s, z20.s
-; CHECK-NEXT:    splice z3.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    uzp1 z6.s, z6.s, z6.s
-; CHECK-NEXT:    uzp1 z17.s, z23.s, z23.s
-; CHECK-NEXT:    uzp1 z16.s, z22.s, z22.s
-; CHECK-NEXT:    uzp1 z5.s, z5.s, z5.s
-; CHECK-NEXT:    uzp1 z4.s, z4.s, z4.s
-; CHECK-NEXT:    uzp1 z1.s, z1.s, z1.s
-; CHECK-NEXT:    uzp1 z0.s, z0.s, z0.s
-; CHECK-NEXT:    splice z18.s, p0, { z18.s, z19.s }
-; CHECK-NEXT:    splice z19.s, p0, { z20.s, z21.s }
-; CHECK-NEXT:    splice z6.s, p0, { z6.s, z7.s }
-; CHECK-NEXT:    splice z7.s, p0, { z16.s, z17.s }
-; CHECK-NEXT:    add z2.s, z2.s, z2.s
+; CHECK-NEXT:    uzp1 z20.s, z4.s, z4.s
+; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
+; CHECK-NEXT:    uzp1 z16.s, z6.s, z6.s
+; CHECK-NEXT:    splice z6.s, p0, { z23.s, z24.s }
+; CHECK-NEXT:    uzp1 z18.s, z3.s, z3.s
+; CHECK-NEXT:    splice z3.s, p0, { z25.s, z26.s }
+; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
+; CHECK-NEXT:    add z0.s, z2.s, z2.s
+; CHECK-NEXT:    add z7.s, z7.s, z7.s
+; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
+; CHECK-NEXT:    splice z2.s, p0, { z18.s, z19.s }
+; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
 ; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
+; CHECK-NEXT:    add z6.s, z6.s, z6.s
 ; CHECK-NEXT:    add z3.s, z3.s, z3.s
-; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
-; CHECK-NEXT:    add z5.s, z18.s, z18.s
-; CHECK-NEXT:    add z1.s, z19.s, z19.s
-; CHECK-NEXT:    stp q2, q3, [x1, #96]
-; CHECK-NEXT:    add z2.s, z6.s, z6.s
-; CHECK-NEXT:    add z3.s, z7.s, z7.s
-; CHECK-NEXT:    add z4.s, z4.s, z4.s
-; CHECK-NEXT:    add z0.s, z0.s, z0.s
-; CHECK-NEXT:    stp q5, q1, [x1, #64]
-; CHECK-NEXT:    stp q2, q3, [x1, #32]
-; CHECK-NEXT:    stp q4, q0, [x1]
+; CHECK-NEXT:    stp q0, q7, [x1, #96]
+; CHECK-NEXT:    add z0.s, z1.s, z1.s
+; CHECK-NEXT:    add z1.s, z2.s, z2.s
+; CHECK-NEXT:    add z2.s, z16.s, z16.s
+; CHECK-NEXT:    stp q6, q3, [x1, #64]
+; CHECK-NEXT:    add z3.s, z4.s, z4.s
+; CHECK-NEXT:    stp q0, q1, [x1, #32]
+; CHECK-NEXT:    stp q2, q3, [x1]
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
index d1099b49a45153..ea6123edc8b4c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll
@@ -10,8 +10,10 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
 ; CHECK-NEXT:    tbl z0.h, { z0.h }, z1.h
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4i8:
@@ -35,6 +37,8 @@ define <4 x i8> @shuffle_ext_byone_v4i8(<4 x i8> %op1, <4 x i8> %op2) {
 define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.b, z0.b[7]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.b, w8
@@ -64,6 +68,8 @@ define <8 x i8> @shuffle_ext_byone_v8i8(<8 x i8> %op1, <8 x i8> %op2) {
 define <16 x i8> @shuffle_ext_byone_v16i8(<16 x i8> %op1, <16 x i8> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v16i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.b, z0.b[15]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.b, w8
@@ -152,7 +158,9 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    revw z0.d, p0/m, z0.d
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2i16:
@@ -171,6 +179,8 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) {
 define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.h, z0.h[3]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.h, w8
@@ -198,6 +208,8 @@ define <4 x i16> @shuffle_ext_byone_v4i16(<4 x i16> %op1, <4 x i16> %op2) {
 define <8 x i16> @shuffle_ext_byone_v8i16(<8 x i16> %op1, <8 x i16> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.h, z0.h[7]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.h, w8
@@ -276,6 +288,8 @@ define void @shuffle_ext_byone_v16i16(ptr %a, ptr %b) {
 define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
 ; CHECK-NEXT:    mov z0.s, z0.s[1]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.s, w8
@@ -299,6 +313,8 @@ define <2 x i32> @shuffle_ext_byone_v2i32(<2 x i32> %op1, <2 x i32> %op2) {
 define <4 x i32> @shuffle_ext_byone_v4i32(<4 x i32> %op1, <4 x i32> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.s, z0.s[3]
 ; CHECK-NEXT:    fmov w8, s0
 ; CHECK-NEXT:    insr z1.s, w8
@@ -369,6 +385,8 @@ define void @shuffle_ext_byone_v8i32(ptr %a, ptr %b) {
 define <2 x i64> @shuffle_ext_byone_v2i64(<2 x i64> %op1, <2 x i64> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    mov z0.d, z0.d[1]
 ; CHECK-NEXT:    fmov x8, d0
 ; CHECK-NEXT:    insr z1.d, x8
@@ -430,9 +448,11 @@ define void @shuffle_ext_byone_v4i64(ptr %a, ptr %b) {
 define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.h, z0.h[3]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    insr z0.h, h2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f16:
@@ -456,9 +476,11 @@ define <4 x half> @shuffle_ext_byone_v4f16(<4 x half> %op1, <4 x half> %op2) {
 define <8 x half> @shuffle_ext_byone_v8f16(<8 x half> %op1, <8 x half> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.h, z0.h[7]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.h, h2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v8f16:
@@ -531,9 +553,11 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) {
 define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z2.s, z0.s[1]
 ; CHECK-NEXT:    fmov d0, d1
 ; CHECK-NEXT:    insr z0.s, s2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f32:
@@ -553,9 +577,11 @@ define <2 x float> @shuffle_ext_byone_v2f32(<2 x float> %op1, <2 x float> %op2)
 define <4 x float> @shuffle_ext_byone_v4f32(<4 x float> %op1, <4 x float> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.s, z0.s[3]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.s, s2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v4f32:
@@ -620,9 +646,11 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) {
 define <2 x double> @shuffle_ext_byone_v2f64(<2 x double> %op1, <2 x double> %op2) {
 ; CHECK-LABEL: shuffle_ext_byone_v2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    mov z2.d, z0.d[1]
 ; CHECK-NEXT:    mov z0.d, z1.d
 ; CHECK-NEXT:    insr z0.d, d2
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: shuffle_ext_byone_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
index 37435e35ceabfe..9c7a3d5046d0e8 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-test-register-mov.ll
@@ -39,6 +39,7 @@ define <2 x i64> @fixed_vec_zero_constant() {
 ; CHECK-LABEL: fixed_vec_zero_constant:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_vec_zero_constant:
@@ -53,6 +54,7 @@ define <2 x double> @fixed_vec_fp_zero_constant() {
 ; CHECK-LABEL: fixed_vec_fp_zero_constant:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
 ;
 ; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant:

diff  --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
index d9e8771b5e70af..91f8f5c2c90d84 100644
--- a/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-dot.ll
@@ -43,6 +43,7 @@ define i32 @test(<vscale x 32 x i8> %bin.rdx, <vscale x 32 x i8> %bin.rdx2)  {
 ; CHECK-NEXT:    add z0.s, z1.s, z0.s
 ; CHECK-NEXT:    uaddv d0, p0, z0.s
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %a = sext <vscale x 32 x i8> %bin.rdx to <vscale x 32 x i32>
   %b = sext <vscale x 32 x i8> %bin.rdx2 to <vscale x 32 x i32>

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
index 0e7cff4835e720..84c15e4fbc33c7 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-compress.ll
@@ -160,11 +160,13 @@ define <4 x i32> @test_compress_v4i32_with_sve(<4 x i32> %vec, <4 x i1> %mask) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
 ; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
 ; CHECK-NEXT:    shl v1.4s, v1.4s, #31
 ; CHECK-NEXT:    cmlt v1.4s, v1.4s, #0
 ; CHECK-NEXT:    and z1.s, z1.s, #0x1
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z1.s, #0
 ; CHECK-NEXT:    compact z0.s, p0, z0.s
+; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
     %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> undef)
     ret <4 x i32> %out
@@ -195,6 +197,8 @@ define <4 x double> @test_compress_v4f64_with_sve(<4 x double> %vec, <4 x i1> %m
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
 ; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
+; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
 ; CHECK-NEXT:    ushll v3.2d, v2.2s, #0
 ; CHECK-NEXT:    ushll2 v4.2d, v2.4s, #0
 ; CHECK-NEXT:    fmov x8, d2

diff  --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 55e4d56f2cad40..8dd433b6f23c68 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -104,6 +104,7 @@ define <vscale x 2 x i64> @sve_splat_2xi64_dupm_imm() {
 define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
 ; CHECK-LABEL: sve_splat_2xi8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, x0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i8> undef, i8 %val, i32 0
@@ -142,6 +143,7 @@ define <vscale x 8 x i8> @sve_splat_8xi8_imm() {
 define <vscale x 2 x i16> @sve_splat_2xi16(i16 %val) {
 ; CHECK-LABEL: sve_splat_2xi16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, x0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i16> undef, i16 %val, i32 0
@@ -170,6 +172,7 @@ define <vscale x 4 x i16> @sve_splat_4xi16_imm() {
 define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
 ; CHECK-LABEL: sve_splat_2xi32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    mov z0.d, x0
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i32> undef, i32 %val, i32 0
@@ -213,6 +216,7 @@ define <vscale x 12 x i32> @sve_splat_12xi32(i32 %val) {
 define <vscale x 2 x i1> @sve_splat_2xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_2xi1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.d, xzr, x8
 ; CHECK-NEXT:    ret
@@ -224,6 +228,7 @@ define <vscale x 2 x i1> @sve_splat_2xi1(i1 %val) {
 define <vscale x 4 x i1> @sve_splat_4xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_4xi1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.s, xzr, x8
 ; CHECK-NEXT:    ret
@@ -235,6 +240,7 @@ define <vscale x 4 x i1> @sve_splat_4xi1(i1 %val) {
 define <vscale x 8 x i1> @sve_splat_8xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_8xi1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.h, xzr, x8
 ; CHECK-NEXT:    ret
@@ -246,6 +252,7 @@ define <vscale x 8 x i1> @sve_splat_8xi1(i1 %val) {
 define <vscale x 16 x i1> @sve_splat_16xi1(i1 %val) {
 ; CHECK-LABEL: sve_splat_16xi1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    sbfx x8, x0, #0, #1
 ; CHECK-NEXT:    whilelo p0.b, xzr, x8
 ; CHECK-NEXT:    ret
@@ -259,6 +266,7 @@ define <vscale x 16 x i1> @sve_splat_16xi1(i1 %val) {
 define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
 ; CHECK-LABEL: splat_nxv8bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 8 x bfloat> undef, bfloat %val, i32 0
@@ -269,6 +277,7 @@ define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
 define <vscale x 4 x bfloat> @splat_nxv4bf16(bfloat %val) #0 {
 ; CHECK-LABEL: splat_nxv4bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 4 x bfloat> undef, bfloat %val, i32 0
@@ -279,6 +288,7 @@ define <vscale x 4 x bfloat> @splat_nxv4bf16(bfloat %val) #0 {
 define <vscale x 2 x bfloat> @splat_nxv2bf16(bfloat %val) #0 {
 ; CHECK-LABEL: splat_nxv2bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x bfloat> undef, bfloat %val, i32 0
@@ -289,6 +299,7 @@ define <vscale x 2 x bfloat> @splat_nxv2bf16(bfloat %val) #0 {
 define <vscale x 8 x half> @splat_nxv8f16(half %val) {
 ; CHECK-LABEL: splat_nxv8f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 8 x half> undef, half %val, i32 0
@@ -299,6 +310,7 @@ define <vscale x 8 x half> @splat_nxv8f16(half %val) {
 define <vscale x 4 x half> @splat_nxv4f16(half %val) {
 ; CHECK-LABEL: splat_nxv4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 4 x half> undef, half %val, i32 0
@@ -309,6 +321,7 @@ define <vscale x 4 x half> @splat_nxv4f16(half %val) {
 define <vscale x 2 x half> @splat_nxv2f16(half %val) {
 ; CHECK-LABEL: splat_nxv2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0
 ; CHECK-NEXT:    mov z0.h, h0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x half> undef, half %val, i32 0
@@ -319,6 +332,7 @@ define <vscale x 2 x half> @splat_nxv2f16(half %val) {
 define <vscale x 4 x float> @splat_nxv4f32(float %val) {
 ; CHECK-LABEL: splat_nxv4f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 4 x float> undef, float %val, i32 0
@@ -329,6 +343,7 @@ define <vscale x 4 x float> @splat_nxv4f32(float %val) {
 define <vscale x 2 x float> @splat_nxv2f32(float %val) {
 ; CHECK-LABEL: splat_nxv2f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0
 ; CHECK-NEXT:    mov z0.s, s0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x float> undef, float %val, i32 0
@@ -339,6 +354,7 @@ define <vscale x 2 x float> @splat_nxv2f32(float %val) {
 define <vscale x 2 x double> @splat_nxv2f64(double %val) {
 ; CHECK-LABEL: splat_nxv2f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
 ; CHECK-NEXT:    mov z0.d, d0
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x double> undef, double %val, i32 0

diff  --git a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
index 0ec2bc60c5a8a8..dad357c8a0c132 100644
--- a/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vl-arith.ll
@@ -263,7 +263,9 @@ define i32 @incb_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: incb_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    addvl x0, x0, #3
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -282,7 +284,9 @@ define i32 @inch_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: inch_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    inch x0, all, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -301,7 +305,9 @@ define i32 @incw_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: incw_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incw x0, all, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -320,7 +326,9 @@ define i32 @incd_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: incd_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    incd x0, all, mul #7
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -341,7 +349,9 @@ define i32 @decb_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: decb_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    addvl x0, x0, #-4
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -360,7 +370,9 @@ define i32 @dech_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: dech_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    dech x0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -379,7 +391,9 @@ define i32 @decw_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: decw_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decw x0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 
   %vscale = call i64 @llvm.vscale.i64()
@@ -398,7 +412,9 @@ define i32 @decd_scalar_i32(i32 %a) {
 ;
 ; CHECK-LABEL: decd_scalar_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    decd x0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %vscale = call i64 @llvm.vscale.i64()
   %mul = mul i64 %vscale, 2

diff  --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
index ba3bb51b38b475..06cd65620d1c9e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll
@@ -54,6 +54,7 @@ define void @histogram_i32_promote(ptr %base, <vscale x 2 x i64> %indices, <vsca
 ; CHECK-LABEL: histogram_i32_promote:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov z3.d, x1
 ; CHECK-NEXT:    ld1w { z2.d }, p0/z, [x0, z0.d, lsl #2]
 ; CHECK-NEXT:    ptrue p1.d
@@ -99,6 +100,7 @@ define void @histogram_i16_2_lane(ptr %base, <vscale x 2 x i64> %indices, <vscal
 ; CHECK-LABEL: histogram_i16_2_lane:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov z3.d, x1
 ; CHECK-NEXT:    ld1h { z2.d }, p0/z, [x0, z0.d, lsl #1]
 ; CHECK-NEXT:    ptrue p1.d
@@ -114,6 +116,7 @@ define void @histogram_i8_2_lane(ptr %base, <vscale x 2 x i64> %indices, <vscale
 ; CHECK-LABEL: histogram_i8_2_lane:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    histcnt z1.d, p0/z, z0.d, z0.d
+; CHECK-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-NEXT:    mov z3.d, x1
 ; CHECK-NEXT:    ld1b { z2.d }, p0/z, [x0, z0.d]
 ; CHECK-NEXT:    ptrue p1.d

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
index f97585b78fb3e9..5cea7536e1f3ca 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-luti.ll
@@ -77,7 +77,7 @@ define <vscale x 8 x i16> @test_luti4_lane_i16_x2(<vscale x 8 x i16> %table, <vs
 ; CHECK-LABEL: test_luti4_lane_i16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x i16> @llvm.aarch64.sve.luti4.lane.x2.nxv8i16(<vscale x 8 x i16> %table, <vscale x 8 x i16> %table, <vscale x 16 x i8> %indices, i32 0)
@@ -88,7 +88,7 @@ define <vscale x 8 x half> @test_luti4_lane_f16_x2(<vscale x 8 x half> %table, <
 ; CHECK-LABEL: test_luti4_lane_f16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x half> @llvm.aarch64.sve.luti4.lane.x2.nxv8f16(<vscale x 8 x half> %table, <vscale x 8 x half> %table, <vscale x 16 x i8> %indices, i32 0)
@@ -99,7 +99,7 @@ define <vscale x 8 x bfloat> @test_luti4_lane_bf16_x2(<vscale x 8 x bfloat> %tab
 ; CHECK-LABEL: test_luti4_lane_bf16_x2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z2.d, z0.d
-; CHECK-NEXT:    mov z3.d, z0.d
+; CHECK-NEXT:    mov z3.d, z2.d
 ; CHECK-NEXT:    luti4 z0.h, { z2.h, z3.h }, z1[0]
 ; CHECK-NEXT:    ret
    %res= tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.luti4.lane.x2.nxv8bf16(<vscale x 8 x bfloat> %table, <vscale x 8 x bfloat> %table, <vscale x 16 x i8> %indices, i32 0)

diff  --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
index b200eb3f23bf2a..9fd1eb616c28c9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll
@@ -8,8 +8,9 @@
 define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: tbl2_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.b, { z0.b, z1.b }, z3.b
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.b, { z1.b, z2.b }, z3.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl2.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b,
@@ -20,8 +21,9 @@ define <vscale x 16 x i8> @tbl2_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %unu
 define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unused, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbl2_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.h, { z0.h, z1.h }, z3.h
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl2.nxv8i16(<vscale x 8 x i16> %a,
                                                                 <vscale x 8 x i16> %b,
@@ -32,8 +34,9 @@ define <vscale x 8 x i16> @tbl2_h(<vscale x 8 x i16> %a, <vscale x 16 x i8> %unu
 define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbl2_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.s, { z0.s, z1.s }, z3.s
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.s, { z1.s, z2.s }, z3.s
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl2.nxv4i32(<vscale x 4 x i32> %a,
                                                                 <vscale x 4 x i32> %b,
@@ -44,8 +47,9 @@ define <vscale x 4 x i32> @tbl2_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %unu
 define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbl2_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.d, { z0.d, z1.d }, z3.d
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.d, { z1.d, z2.d }, z3.d
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl2.nxv2i64(<vscale x 2 x i64> %a,
                                                                 <vscale x 2 x i64> %b,
@@ -56,8 +60,9 @@ define <vscale x 2 x i64> @tbl2_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %unu
 define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half> %unused, <vscale x 8 x half> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: tbl2_fh:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.h, { z0.h, z1.h }, z3.h
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl2.nxv8f16(<vscale x 8 x half> %a,
                                                                  <vscale x 8 x half> %b,
@@ -68,8 +73,9 @@ define <vscale x 8 x half> @tbl2_fh(<vscale x 8 x half> %a, <vscale x 8 x half>
 define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %unused, <vscale x 8 x bfloat> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: tbl2_bf16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.h, { z0.h, z1.h }, z3.h
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.h, { z1.h, z2.h }, z3.h
 ; CHECK-NEXT:    ret
   %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl2.nxv8bf16(<vscale x 8 x bfloat> %a,
                                                                     <vscale x 8 x bfloat> %b,
@@ -80,8 +86,9 @@ define <vscale x 8 x bfloat> @tbl2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x
 define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x float> %unused, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: tbl2_fs:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.s, { z0.s, z1.s }, z3.s
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.s, { z1.s, z2.s }, z3.s
 ; CHECK-NEXT:    ret
   %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl2.nxv4f32(<vscale x 4 x float> %a,
                                                                   <vscale x 4 x float> %b,
@@ -92,8 +99,9 @@ define <vscale x 4 x float> @tbl2_fs(<vscale x 4 x float> %a, <vscale x 4 x floa
 define <vscale x 2 x double> @tbl2_fd(<vscale x 2 x double> %a, <vscale x 2 x double> %unused, <vscale x 2 x double> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: tbl2_fd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z1.d, z2.d
-; CHECK-NEXT:    tbl z0.d, { z0.d, z1.d }, z3.d
+; CHECK-NEXT:    // kill: def $z2 killed $z2 def $z1_z2
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    tbl z0.d, { z1.d, z2.d }, z3.d
 ; CHECK-NEXT:    ret
   %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl2.nxv2f64(<vscale x 2 x double> %a,
                                                                    <vscale x 2 x double> %b,

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
index 47709757668b39..baadd08e392d05 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll
@@ -15,6 +15,8 @@ declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fclamp.nxv8bf16(<vscale x 8 x bf
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x2_f16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d){
 ; CHECK-LABEL: test_bfclamp_single_x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    bfclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d)
@@ -24,6 +26,10 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x2_
 define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @test_bfclamp_single_x4_f16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d, <vscale x 8 x bfloat> %e, <vscale x 8 x bfloat> %f){
 ; CHECK-LABEL: test_bfclamp_single_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    bfclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, <vscale x 8 x bfloat> %d, <vscale x 8 x bfloat> %e, <vscale x 8 x bfloat> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
index 35ee8c8b77b6f1..0a5a0b7cfcc880 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll
@@ -33,6 +33,8 @@ define <vscale x 2 x double> @test_fclamp_f64(<vscale x 2 x double> %a, <vscale
 define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d) #1 {
 ; CHECK-LABEL: test_fclamp_single_x2_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d)
@@ -42,6 +44,8 @@ define { <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x2_f16(<
 define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) #1 {
 ; CHECK-LABEL: test_fclamp_single_x2_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d)
@@ -51,6 +55,8 @@ define { <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x2_f32
 define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d) #1 {
 ; CHECK-LABEL: test_fclamp_single_x2_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    fclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d)
@@ -61,6 +67,10 @@ define { <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x2_f
 define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @test_fclamp_single_x4_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f) #1 {
 ; CHECK-LABEL: test_fclamp_single_x4_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c, <vscale x 8 x half> %d, <vscale x 8 x half> %e, <vscale x 8 x half> %f)
@@ -70,6 +80,10 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @test_fclamp_single_x4_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f) #1 {
 ; CHECK-LABEL: test_fclamp_single_x4_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d, <vscale x 4 x float> %e, <vscale x 4 x float> %f)
@@ -79,6 +93,10 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @test_fclamp_single_x4_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f) #1 {
 ; CHECK-LABEL: test_fclamp_single_x4_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    fclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c, <vscale x 2 x double> %d, <vscale x 2 x double> %e, <vscale x 2 x double> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
index 57e1a1e100db0b..8fe0694808c8e9 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll
@@ -7,6 +7,8 @@
 define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -19,6 +21,8 @@ define void @st2q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -32,6 +36,8 @@ define void @st2q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -45,6 +51,8 @@ define void @st2q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -58,6 +66,8 @@ define void @st2q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -71,6 +81,8 @@ define void @st2q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -84,6 +96,8 @@ define void @st2q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -97,6 +111,8 @@ define void @st2q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st2q_ss_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -111,6 +127,8 @@ define void @st2q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2q_si_i8_off16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -16
@@ -124,6 +142,8 @@ define void @st2q_si_i8_off16(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st2q_si_i8_off14:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 14
@@ -137,6 +157,8 @@ define void @st2q_si_i8_off14(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x i16>, ptr %base, i64 14
@@ -150,6 +172,8 @@ define void @st2q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 4 x i32>, ptr %base, i64 14
@@ -163,6 +187,8 @@ define void @st2q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 2 x i64>, ptr %base, i64 14
@@ -176,6 +202,8 @@ define void @st2q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x half>, ptr %base, i64 14
@@ -189,6 +217,8 @@ define void @st2q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 4 x float>, ptr %base, i64 14
@@ -202,6 +232,8 @@ define void @st2q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep= getelementptr <vscale x 2 x double>, ptr %base, i64 14
@@ -215,6 +247,8 @@ define void @st2q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x i1> %pred, ptr %base) {
 ; CHECK-LABEL: st2q_si_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    st2q { z0.q, z1.q }, p0, [x0, #14, mul vl]
 ; CHECK-NEXT:    ret
   %gep = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 14
@@ -232,6 +266,9 @@ define void @st2q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -246,6 +283,9 @@ define void @st3q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -260,6 +300,9 @@ define void @st3q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -274,6 +317,9 @@ define void @st3q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -288,6 +334,9 @@ define void @st3q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -302,6 +351,9 @@ define void @st3q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -316,6 +368,9 @@ define void @st3q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -330,6 +385,9 @@ define void @st3q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st3q_ss_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -344,6 +402,9 @@ define void @st3q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i8_off24:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -24
@@ -358,6 +419,9 @@ define void @st3q_si_i8_off24(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i8_off21:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 21
@@ -372,6 +436,9 @@ define void @st3q_si_i8_off21(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2,  <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 21
@@ -386,6 +453,9 @@ define void @st3q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x i32>, ptr %addr, i64 21
@@ -400,6 +470,9 @@ define void @st3q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 21
@@ -414,6 +487,9 @@ define void @st3q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1,<vscale
 define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 21
@@ -428,6 +504,9 @@ define void @st3q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 21
@@ -442,6 +521,9 @@ define void @st3q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 21
@@ -456,6 +538,9 @@ define void @st3q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st3q_si_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2
 ; CHECK-NEXT:    st3q { z0.q - z2.q }, p0, [x0, #21, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 21
@@ -473,6 +558,10 @@ define void @st3q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -488,6 +577,10 @@ define void @st4q_ss_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale
 define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -503,6 +596,10 @@ define void @st4q_ss_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -518,6 +615,10 @@ define void @st4q_ss_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -533,6 +634,10 @@ define void @st4q_ss_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -548,6 +653,10 @@ define void @st4q_ss_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -563,6 +672,10 @@ define void @st4q_ss_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -578,6 +691,10 @@ define void @st4q_ss_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr, i64 %offset) {
 ; CHECK-LABEL: st4q_ss_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4]
 ; CHECK-NEXT:    ret
   %1 = getelementptr i128, ptr %addr, i64 %offset
@@ -593,6 +710,10 @@ define void @st4q_ss_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1,
 define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i8_off32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 -32
@@ -608,6 +729,10 @@ define void @st4q_si_i8_off32(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2,<vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i8_off28:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 16 x i8>, ptr %addr, i64 28
@@ -623,6 +748,10 @@ define void @st4q_si_i8_off28(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <v
 define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3,  <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x i16>, ptr %addr, i64 28
@@ -638,6 +767,10 @@ define void @st4q_si_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale
 define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base1 = getelementptr <vscale x 4 x i32>, ptr %addr, i64 28
@@ -653,6 +786,10 @@ define void @st4q_si_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale
 define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x i64>, ptr %addr, i64 28
@@ -668,6 +805,10 @@ define void @st4q_si_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale
 define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x half>, ptr %addr, i64 28
@@ -683,6 +824,10 @@ define void @st4q_si_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vsca
 define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2,<vscale x 4 x float> %v3,  <vscale x 4 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 4 x float>, ptr %addr, i64 28
@@ -698,6 +843,10 @@ define void @st4q_si_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vs
 define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_f64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 2 x double>, ptr %addr, i64 28
@@ -713,6 +862,10 @@ define void @st4q_si_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <
 define void @st4q_si_bf16(<vscale x 8 x bfloat> %v0, <vscale x 8 x bfloat> %v1, <vscale x 8 x bfloat> %v2, <vscale x 8 x bfloat> %v3, <vscale x 8 x i1> %pred, ptr %addr) {
 ; CHECK-LABEL: st4q_si_bf16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    st4q { z0.q - z3.q }, p0, [x0, #28, mul vl]
 ; CHECK-NEXT:    ret
   %base = getelementptr <vscale x 8 x bfloat>, ptr %addr, i64 28

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index 15efb8cf1ad5a4..0475601f1907eb 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -42,6 +42,8 @@ define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.b, z1.b }, z2.b, z3.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -51,6 +53,8 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x2_i8(<vsc
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x  8 x i16>, <vscale x  8 x i16> } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -60,6 +64,8 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x2_i16(<vs
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -69,6 +75,8 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x2_i32(<vs
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
 ; CHECK-LABEL: test_sclamp_single_x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    sclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -78,6 +86,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x2_i64(<vs
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_sclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.b - z3.b }, z4.b, z5.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -87,6 +99,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_sclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -96,6 +112,10 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_sclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -105,6 +125,10 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_sclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
 ; CHECK-LABEL: test_sclamp_single_x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    sclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
index 2164b3161da5c4..b19b5d871459a8 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll
@@ -8,18 +8,18 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.b
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1b { z27.b }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z4.b - z7.b }, { z24.b - z27.b }
+; CHECK-NEXT:    ld1b { z27.b }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -33,18 +33,18 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -58,18 +58,18 @@ define { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -83,18 +83,18 @@ define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <v
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.h
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z4.h - z7.h }, { z24.h - z27.h }
+; CHECK-NEXT:    ld1h { z27.h }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -108,18 +108,18 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -133,18 +133,18 @@ define { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vsca
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.s
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z4.s - z7.s }, { z24.s - z27.s }
+; CHECK-NEXT:    ld1w { z27.s }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -158,18 +158,18 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
@@ -183,18 +183,18 @@ define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <v
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov z26.d, z7.d
+; CHECK-NEXT:    mov z31.d, z4.d
+; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    ptrue p1.d
 ; CHECK-NEXT:    mov z25.d, z6.d
-; CHECK-NEXT:    mov z7.d, z4.d
+; CHECK-NEXT:    mov z30.d, z3.d
 ; CHECK-NEXT:    mov z24.d, z5.d
-; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
-; CHECK-NEXT:    mov z6.d, z3.d
-; CHECK-NEXT:    mov z5.d, z2.d
-; CHECK-NEXT:    mov z4.d, z1.d
+; CHECK-NEXT:    mov z29.d, z2.d
 ; CHECK-NEXT:    mov p8.b, p0.b
-; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z4.d - z7.d }, { z24.d - z27.d }
+; CHECK-NEXT:    ld1d { z27.d }, p1/z, [x0]
+; CHECK-NEXT:    mov z28.d, z1.d
+; CHECK-NEXT:    sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
index e46a7da551546f..e7d9c3e6839e43 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll
@@ -10,9 +10,9 @@ define void @st1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1b { z2.b, z3.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -28,9 +28,9 @@ define void @st1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -46,9 +46,9 @@ define void @st1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -64,9 +64,9 @@ define void @st1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -82,9 +82,9 @@ define void @st1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -100,9 +100,9 @@ define void @st1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -118,9 +118,9 @@ define void @st1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -136,9 +136,9 @@ define void @st1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    st1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -154,11 +154,11 @@ define void @st1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <vsc
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1b { z4.b - z7.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -174,11 +174,11 @@ define void @st1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -194,11 +194,11 @@ define void @st1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -214,11 +214,11 @@ define void @st1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <vs
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -234,11 +234,11 @@ define void @st1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -254,11 +254,11 @@ define void @st1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -274,11 +274,11 @@ define void @st1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -294,11 +294,11 @@ define void @st1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    st1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -316,9 +316,9 @@ define void @stnt1_x2_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1b { z2.b, z3.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -334,9 +334,9 @@ define void @stnt1_x2_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -352,9 +352,9 @@ define void @stnt1_x2_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -370,9 +370,9 @@ define void @stnt1_x2_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -388,9 +388,9 @@ define void @stnt1_x2_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -406,9 +406,9 @@ define void @stnt1_x2_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1h { z2.h, z3.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -424,9 +424,9 @@ define void @stnt1_x2_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1w { z2.s, z3.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -442,9 +442,9 @@ define void @stnt1_x2_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z3.d, z2.d
-; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z2.d, z1.d
 ; CHECK-NEXT:    stnt1d { z2.d, z3.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -460,11 +460,11 @@ define void @stnt1_x4_i8(<vscale x 16 x i8> %unused, <vscale x 16 x i8> %zn0, <v
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1b { z4.b - z7.b }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -480,11 +480,11 @@ define void @stnt1_x4_i16(<vscale x 16 x i8> %unused, <vscale x 8 x i16> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -500,11 +500,11 @@ define void @stnt1_x4_i32(<vscale x 16 x i8> %unused, <vscale x 4 x i32> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -520,11 +520,11 @@ define void @stnt1_x4_i64(<vscale x 16 x i8> %unused, <vscale x 2 x i64> %zn0, <
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -540,11 +540,11 @@ define void @stnt1_x4_f16(<vscale x 16 x i8> %unused, <vscale x 8 x half> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -560,11 +560,11 @@ define void @stnt1_x4_bf16(<vscale x 16 x i8> %unused, <vscale x 8 x bfloat> %zn
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1h { z4.h - z7.h }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -580,11 +580,11 @@ define void @stnt1_x4_f32(<vscale x 16 x i8> %unused, <vscale x 4 x float> %zn0,
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1w { z4.s - z7.s }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1
@@ -600,11 +600,11 @@ define void @stnt1_x4_f64(<vscale x 16 x i8> %unused, <vscale x 2 x double> %zn0
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    addvl sp, sp, #-1
 ; CHECK-NEXT:    mov z7.d, z4.d
-; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    str p8, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    mov p8.b, p0.b
+; CHECK-NEXT:    mov z6.d, z3.d
 ; CHECK-NEXT:    mov z5.d, z2.d
 ; CHECK-NEXT:    mov z4.d, z1.d
-; CHECK-NEXT:    mov p8.b, p0.b
 ; CHECK-NEXT:    stnt1d { z4.d - z7.d }, pn8, [x0]
 ; CHECK-NEXT:    ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload
 ; CHECK-NEXT:    addvl sp, sp, #1

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index 65ebd3f7cd0bd4..a39f6cfa3b4c56 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -42,6 +42,8 @@ define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x
 define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.b, z1.b }, z2.b, z3.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d)
@@ -51,6 +53,8 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x2_i8(<vsc
 define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.h, z1.h }, z2.h, z3.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x  8 x i16>, <vscale x  8 x i16> } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d)
@@ -60,6 +64,8 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x2_i16(<vs
 define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.s, z1.s }, z2.s, z3.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d)
@@ -69,6 +75,8 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x2_i32(<vs
 define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d) #1 {
 ; CHECK-LABEL: test_uclamp_single_x2_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
 ; CHECK-NEXT:    uclamp { z0.d, z1.d }, z2.d, z3.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d)
@@ -78,6 +86,10 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x2_i64(<vs
 define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @test_uclamp_single_x4_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.b - z3.b }, z4.b, z5.b
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c, <vscale x 16 x i8> %d, <vscale x 16 x i8> %e, <vscale x 16 x i8> %f)
@@ -87,6 +99,10 @@ define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 1
 define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @test_uclamp_single_x4_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.h - z3.h }, z4.h, z5.h
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e, <vscale x 8 x i16> %f)
@@ -96,6 +112,10 @@ define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8
 define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @test_uclamp_single_x4_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.s - z3.s }, z4.s, z5.s
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c, <vscale x 4 x i32> %d, <vscale x 4 x i32> %e, <vscale x 4 x i32> %f)
@@ -105,6 +125,10 @@ define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4
 define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @test_uclamp_single_x4_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f) #1 {
 ; CHECK-LABEL: test_uclamp_single_x4_i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
+; CHECK-NEXT:    // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
 ; CHECK-NEXT:    uclamp { z0.d - z3.d }, z4.d, z5.d
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c, <vscale x 2 x i64> %d, <vscale x 2 x i64> %e, <vscale x 2 x i64> %f)

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
index 53436794b5b3cc..198d3c92158466 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll
@@ -97,11 +97,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @uzp_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
 ; CHECK-LABEL: uzp_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z7.d, z5.d
-; CHECK-NEXT:    mov z6.d, z4.d
-; CHECK-NEXT:    mov z5.d, z3.d
-; CHECK-NEXT:    mov z4.d, z2.d
-; CHECK-NEXT:    uzp { z0.d - z3.d }, { z4.d - z7.d }
+; CHECK-NEXT:    mov z27.d, z5.d
+; CHECK-NEXT:    mov z26.d, z4.d
+; CHECK-NEXT:    mov z25.d, z3.d
+; CHECK-NEXT:    mov z24.d, z2.d
+; CHECK-NEXT:    uzp { z0.d - z3.d }, { z24.d - z27.d }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzp.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res
@@ -204,11 +204,11 @@ define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2
 define { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @zipq_x4_f64(<vscale x 4 x double> %unused, <vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4) nounwind {
 ; CHECK-LABEL: zipq_x4_f64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z7.d, z5.d
-; CHECK-NEXT:    mov z6.d, z4.d
-; CHECK-NEXT:    mov z5.d, z3.d
-; CHECK-NEXT:    mov z4.d, z2.d
-; CHECK-NEXT:    uzp { z0.q - z3.q }, { z4.q - z7.q }
+; CHECK-NEXT:    mov z27.d, z5.d
+; CHECK-NEXT:    mov z26.d, z4.d
+; CHECK-NEXT:    mov z25.d, z3.d
+; CHECK-NEXT:    mov z24.d, z2.d
+; CHECK-NEXT:    uzp { z0.q - z3.q }, { z24.q - z27.q }
 ; CHECK-NEXT:    ret
   %res = call { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.uzpq.x4.nxv2f64(<vscale x 2 x double> %zn1, <vscale x 2 x double> %zn2, <vscale x 2 x double> %zn3, <vscale x 2 x double> %zn4)
   ret { <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double> } %res

diff  --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
index 638849605a2cb8..ab70f57b488742 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll
@@ -7,6 +7,7 @@ define <vscale x 16 x i1> @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -17,6 +18,7 @@ define <vscale x 8 x i1> @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -27,6 +29,7 @@ define <vscale x 4 x i1> @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -37,6 +40,7 @@ define <vscale x 2 x i1> @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilege_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilege { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -50,6 +54,7 @@ define <vscale x 16 x i1> @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -60,6 +65,7 @@ define <vscale x 8 x i1> @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -70,6 +76,7 @@ define <vscale x 4 x i1> @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -80,6 +87,7 @@ define <vscale x 2 x i1> @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilegt_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilegt { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -93,6 +101,7 @@ define <vscale x 16 x i1> @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -103,6 +112,7 @@ define <vscale x 8 x i1> @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -113,6 +123,7 @@ define <vscale x 4 x i1> @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -123,6 +134,7 @@ define <vscale x 2 x i1> @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehi_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehi { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -136,6 +148,7 @@ define <vscale x 16 x i1> @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -146,6 +159,7 @@ define <vscale x 8 x i1> @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -156,6 +170,7 @@ define <vscale x 4 x i1> @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -166,6 +181,7 @@ define <vscale x 2 x i1> @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilehs_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilehs { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -179,6 +195,7 @@ define <vscale x 16 x i1> @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -189,6 +206,7 @@ define <vscale x 8 x i1> @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -199,6 +217,7 @@ define <vscale x 4 x i1> @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -209,6 +228,7 @@ define <vscale x 2 x i1> @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilele_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilele { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -222,6 +242,7 @@ define <vscale x 16 x i1> @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -232,6 +253,7 @@ define <vscale x 8 x i1> @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -242,6 +264,7 @@ define <vscale x 4 x i1> @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -252,6 +275,7 @@ define <vscale x 2 x i1> @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelo_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelo { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -265,6 +289,7 @@ define <vscale x 16 x i1> @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -275,6 +300,7 @@ define <vscale x 8 x i1> @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -285,6 +311,7 @@ define <vscale x 4 x i1> @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -295,6 +322,7 @@ define <vscale x 2 x i1> @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilels_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilels { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0
@@ -308,6 +336,7 @@ define <vscale x 16 x i1> @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv16i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.b, p1.b }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 16 x i1>, <vscale x 16 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 16 x i1>, <vscale x 16 x i1>} %pp, 0
@@ -318,6 +347,7 @@ define <vscale x 8 x i1> @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv8i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.h, p1.h }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 8 x i1>, <vscale x 8 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 8 x i1>, <vscale x 8 x i1>} %pp, 0
@@ -328,6 +358,7 @@ define <vscale x 4 x i1> @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv4i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.s, p1.s }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 4 x i1>, <vscale x 4 x i1>} %pp, 0
@@ -338,6 +369,7 @@ define <vscale x 2 x i1> @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind {
 ; CHECK-LABEL: whilelt_x2_nxv2i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    whilelt { p0.d, p1.d }, x0, x1
+; CHECK-NEXT:    // kill: def $p0 killed $p0 killed $p0_p1
 ; CHECK-NEXT:    ret
   %pp = call { <vscale x 2 x i1>, <vscale x 2 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n)
   %res = extractvalue {<vscale x 2 x i1>, <vscale x 2 x i1>} %pp, 0

diff  --git a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
index 77415381709d18..d3abc27a53dadc 100644
--- a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
+++ b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll
@@ -9,7 +9,6 @@ define void @"func"(ptr swifterror %0) #0 {
 ; CHECK-NEXT:    b {{\.?}}LBB0_2
 ; CHECK-NEXT:  {{\.?}}LBB0_1:{{.*}}%thirtythree
 ; CHECK-NEXT:  {{.*}}=>This Inner Loop Header: Depth=1
-; CHECK-NEXT:  {{.*}}implicit-def: $x0
 ; CHECK-NEXT:    b {{\.?}}LBB0_1
 ; CHECK-NEXT:  {{\.?}}LBB0_2:{{.*}}%thirtyeight
 ; CHECK-NEXT:    b {{\.?}}LBB0_3

diff  --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll
index dd5ce449bb1d2a..0ad99008655184 100644
--- a/llvm/test/CodeGen/AArch64/tbl-loops.ll
+++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll
@@ -203,17 +203,16 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmgt v3.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    fcmgt v4.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    fcmlt v5.4s, v1.4s, #0.0
-; CHECK-NEXT:    bit v1.16b, v0.16b, v3.16b
-; CHECK-NEXT:    mov v3.16b, v4.16b
-; CHECK-NEXT:    bsl v3.16b, v0.16b, v2.16b
-; CHECK-NEXT:    fcmlt v2.4s, v2.4s, #0.0
-; CHECK-NEXT:    bic v1.16b, v1.16b, v5.16b
-; CHECK-NEXT:    bic v2.16b, v3.16b, v2.16b
-; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
+; CHECK-NEXT:    bsl v3.16b, v0.16b, v1.16b
+; CHECK-NEXT:    bsl v4.16b, v0.16b, v2.16b
+; CHECK-NEXT:    fcmlt v1.4s, v2.4s, #0.0
+; CHECK-NEXT:    bic v2.16b, v3.16b, v5.16b
+; CHECK-NEXT:    bic v1.16b, v4.16b, v1.16b
 ; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
+; CHECK-NEXT:    fcvtzs v1.4s, v1.4s
 ; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    trn1 v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    xtn v1.4h, v1.4s
+; CHECK-NEXT:    trn1 v1.8b, v2.8b, v1.8b
 ; CHECK-NEXT:    str d1, [x0], #8
 ; CHECK-NEXT:    b.ne .LBB1_9
 ; CHECK-NEXT:  // %bb.10: // %middle.block
@@ -353,22 +352,21 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmgt v6.4s, v3.4s, v0.4s
 ; CHECK-NEXT:    fcmgt v7.4s, v4.4s, v0.4s
 ; CHECK-NEXT:    fcmlt v16.4s, v2.4s, #0.0
-; CHECK-NEXT:    bit v2.16b, v0.16b, v5.16b
-; CHECK-NEXT:    fcmlt v5.4s, v3.4s, #0.0
-; CHECK-NEXT:    bit v3.16b, v0.16b, v6.16b
-; CHECK-NEXT:    mov v6.16b, v7.16b
-; CHECK-NEXT:    bsl v6.16b, v0.16b, v4.16b
-; CHECK-NEXT:    fcmlt v4.4s, v4.4s, #0.0
-; CHECK-NEXT:    bic v2.16b, v2.16b, v16.16b
-; CHECK-NEXT:    bic v3.16b, v3.16b, v5.16b
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    bic v4.16b, v6.16b, v4.16b
+; CHECK-NEXT:    fcmlt v17.4s, v3.4s, #0.0
+; CHECK-NEXT:    bsl v5.16b, v0.16b, v2.16b
+; CHECK-NEXT:    bsl v6.16b, v0.16b, v3.16b
+; CHECK-NEXT:    bsl v7.16b, v0.16b, v4.16b
+; CHECK-NEXT:    fcmlt v2.4s, v4.4s, #0.0
+; CHECK-NEXT:    bic v3.16b, v5.16b, v16.16b
+; CHECK-NEXT:    bic v4.16b, v6.16b, v17.16b
+; CHECK-NEXT:    bic v2.16b, v7.16b, v2.16b
 ; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
 ; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v3.4h, v3.4s
-; CHECK-NEXT:    xtn v4.4h, v4.4s
-; CHECK-NEXT:    tbl v2.16b, { v2.16b, v3.16b, v4.16b }, v1.16b
+; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-NEXT:    xtn v5.4h, v3.4s
+; CHECK-NEXT:    xtn v6.4h, v4.4s
+; CHECK-NEXT:    xtn v7.4h, v2.4s
+; CHECK-NEXT:    tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b
 ; CHECK-NEXT:    st1 { v2.s }[2], [x13]
 ; CHECK-NEXT:    str d2, [x0], #12
 ; CHECK-NEXT:    b.ne .LBB2_4
@@ -607,27 +605,26 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n
 ; CHECK-NEXT:    fcmgt v16.4s, v4.4s, v0.4s
 ; CHECK-NEXT:    fcmgt v17.4s, v5.4s, v0.4s
 ; CHECK-NEXT:    fcmlt v18.4s, v2.4s, #0.0
-; CHECK-NEXT:    bit v2.16b, v0.16b, v6.16b
-; CHECK-NEXT:    fcmlt v6.4s, v3.4s, #0.0
-; CHECK-NEXT:    bit v3.16b, v0.16b, v7.16b
-; CHECK-NEXT:    fcmlt v7.4s, v4.4s, #0.0
-; CHECK-NEXT:    bit v4.16b, v0.16b, v16.16b
-; CHECK-NEXT:    mov v16.16b, v17.16b
-; CHECK-NEXT:    bsl v16.16b, v0.16b, v5.16b
-; CHECK-NEXT:    fcmlt v5.4s, v5.4s, #0.0
-; CHECK-NEXT:    bic v2.16b, v2.16b, v18.16b
-; CHECK-NEXT:    bic v3.16b, v3.16b, v6.16b
-; CHECK-NEXT:    bic v4.16b, v4.16b, v7.16b
-; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
-; CHECK-NEXT:    bic v5.16b, v16.16b, v5.16b
+; CHECK-NEXT:    fcmlt v19.4s, v3.4s, #0.0
+; CHECK-NEXT:    fcmlt v20.4s, v4.4s, #0.0
+; CHECK-NEXT:    bsl v6.16b, v0.16b, v2.16b
+; CHECK-NEXT:    bsl v7.16b, v0.16b, v3.16b
+; CHECK-NEXT:    bsl v16.16b, v0.16b, v4.16b
+; CHECK-NEXT:    bsl v17.16b, v0.16b, v5.16b
+; CHECK-NEXT:    fcmlt v2.4s, v5.4s, #0.0
+; CHECK-NEXT:    bic v3.16b, v6.16b, v18.16b
+; CHECK-NEXT:    bic v4.16b, v7.16b, v19.16b
+; CHECK-NEXT:    bic v5.16b, v16.16b, v20.16b
+; CHECK-NEXT:    bic v2.16b, v17.16b, v2.16b
 ; CHECK-NEXT:    fcvtzs v3.4s, v3.4s
 ; CHECK-NEXT:    fcvtzs v4.4s, v4.4s
 ; CHECK-NEXT:    fcvtzs v5.4s, v5.4s
-; CHECK-NEXT:    xtn v2.4h, v2.4s
-; CHECK-NEXT:    xtn v3.4h, v3.4s
-; CHECK-NEXT:    xtn v4.4h, v4.4s
-; CHECK-NEXT:    xtn v5.4h, v5.4s
-; CHECK-NEXT:    tbl v2.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b
+; CHECK-NEXT:    fcvtzs v2.4s, v2.4s
+; CHECK-NEXT:    xtn v16.4h, v3.4s
+; CHECK-NEXT:    xtn v17.4h, v4.4s
+; CHECK-NEXT:    xtn v18.4h, v5.4s
+; CHECK-NEXT:    xtn v19.4h, v2.4s
+; CHECK-NEXT:    tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b
 ; CHECK-NEXT:    str q2, [x0], #16
 ; CHECK-NEXT:    b.ne .LBB3_9
 ; CHECK-NEXT:  // %bb.10: // %middle.block

diff  --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
index 0b09a3d9896ea0..184e8fff154b95 100644
--- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll
@@ -74,8 +74,8 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-BE-NEXT:    add x10, x9, #16
 ; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
 ; CHECK-BE-NEXT:    add x11, x9, #32
-; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
+; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v3.16b }, [x11]
 ; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
 ; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
@@ -363,21 +363,21 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-BE-NEXT:    add x9, x0, x8, lsl #7
 ; CHECK-BE-NEXT:    add x13, x9, #64
-; CHECK-BE-NEXT:    add x10, x9, #112
-; CHECK-BE-NEXT:    add x11, x9, #96
 ; CHECK-BE-NEXT:    add x12, x9, #80
 ; CHECK-BE-NEXT:    add x14, x9, #16
 ; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
 ; CHECK-BE-NEXT:    ld1 { v16.16b }, [x13]
+; CHECK-BE-NEXT:    add x11, x9, #96
 ; CHECK-BE-NEXT:    add x13, x9, #32
-; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v2.16b }, [x14]
 ; CHECK-BE-NEXT:    ld1 { v17.16b }, [x12]
+; CHECK-BE-NEXT:    add x10, x9, #112
+; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v3.16b }, [x13]
 ; CHECK-BE-NEXT:    ld1 { v18.16b }, [x11]
 ; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
-; CHECK-BE-NEXT:    ld1 { v19.16b }, [x10]
 ; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
+; CHECK-BE-NEXT:    ld1 { v19.16b }, [x10]
 ; CHECK-BE-NEXT:    add x8, x8, #1
 ; CHECK-BE-NEXT:    cmp x8, #1000
 ; CHECK-BE-NEXT:    tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
@@ -510,8 +510,8 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
 ; CHECK-BE-NEXT:    add x10, x9, #16
 ; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
 ; CHECK-BE-NEXT:    add x11, x9, #32
-; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
+; CHECK-BE-NEXT:    add x9, x9, #48
 ; CHECK-BE-NEXT:    ld1 { v3.16b }, [x11]
 ; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
 ; CHECK-BE-NEXT:    add x9, x1, x8, lsl #3

diff  --git a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
index 59d3a5c97d65ea..f0856c43daf1d9 100644
--- a/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
+++ b/llvm/test/CodeGen/AArch64/uaddlv-vaddlp-combine.ll
@@ -6,6 +6,7 @@ define i32 @uaddlv_uaddlp_v8i16(<8 x i16> %0) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uaddlv s0, v0.8h
 ; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
   %2 = tail call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %0)
   %3 = tail call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %2)

diff  --git a/llvm/test/CodeGen/AArch64/umul_fix_sat.ll b/llvm/test/CodeGen/AArch64/umul_fix_sat.ll
index 3eb70ff79d8c6f..e9965fd5319b5b 100644
--- a/llvm/test/CodeGen/AArch64/umul_fix_sat.ll
+++ b/llvm/test/CodeGen/AArch64/umul_fix_sat.ll
@@ -84,6 +84,8 @@ define i4 @func6(i4 %x, i4 %y) nounwind {
 define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-LABEL: vec:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov w8, v1.s[1]
 ; CHECK-NEXT:    mov w9, v0.s[1]
 ; CHECK-NEXT:    fmov w10, s0
@@ -96,6 +98,7 @@ define <2 x i32> @vec(<2 x i32> %x, <2 x i32> %y) nounwind {
 ; CHECK-NEXT:    csinv w9, w9, wzr, eq
 ; CHECK-NEXT:    fmov s0, w9
 ; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp = call <2 x i32> @llvm.umul.fix.sat.v2i32(<2 x i32> %x, <2 x i32> %y, i32 0)
   ret <2 x i32> %tmp

diff  --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
index a07e35e6b3ad37..468a33ce5bfcf6 100644
--- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
@@ -282,6 +282,7 @@ define <2 x i64> @fold_urem_v2i64(<2 x i64> %x) {
 define <1 x i64> @fold_urem_v1i64(<1 x i64> %x) {
 ; CHECK-LABEL: fold_urem_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x9, d0
 ; CHECK-NEXT:    mov x8, #-3689348814741910324 // =0xcccccccccccccccc
 ; CHECK-NEXT:    mov w10, #10 // =0xa

diff  --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index b095f3598276d7..557aa010b3a7d9 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -510,6 +510,7 @@ define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
 ; CHECK-NEXT:    ld1.b { v0 }[0], [x8]
 ; CHECK-NEXT:    orr x8, x8, #0x1
 ; CHECK-NEXT:    ld1.b { v0 }[4], [x8]
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:    .loh AdrpLdr Lloh36, Lloh37

diff  --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
index f6cc7837d44a0b..854a2d75d4b864 100644
--- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll
+++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll
@@ -45,7 +45,10 @@ define <1 x float> @sin_v1f32(<1 x float> %x) nounwind {
 ; CHECK-LABEL: sin_v1f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $d0
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x)
@@ -56,16 +59,21 @@ define <2 x float> @sin_v2f32(<2 x float> %x) nounwind {
 ; CHECK-LABEL: sin_v2f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    add sp, sp, #48
 ; CHECK-NEXT:    ret
   %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x)
@@ -80,16 +88,20 @@ define <3 x float> @sin_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -107,22 +119,27 @@ define <4 x float> @sin_v4f32(<4 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[3]
 ; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    bl sinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[3], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -265,16 +282,20 @@ define <3 x float> @cos_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl cosf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl cosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl cosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -292,16 +313,20 @@ define <3 x float> @tan_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl tanf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl tanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl tanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -319,16 +344,20 @@ define <3 x float> @asin_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl asinf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl asinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl asinf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -346,16 +375,20 @@ define <3 x float> @acos_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl acosf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl acosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl acosf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -373,16 +406,20 @@ define <3 x float> @atan_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl atanf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl atanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl atanf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -401,10 +438,14 @@ define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
 ; CHECK-NEXT:    mov s1, v1.s[1]
 ; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl atan2f
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 killed $q1
 ; CHECK-NEXT:    bl atan2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
@@ -412,6 +453,7 @@ define <3 x float> @atan2_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
 ; CHECK-NEXT:    mov s1, v1.s[2]
 ; CHECK-NEXT:    bl atan2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -429,16 +471,20 @@ define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl sinhf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sinhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sinhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -455,16 +501,20 @@ define <3 x float> @cosh_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl coshf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl coshf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl coshf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -482,16 +532,20 @@ define <3 x float> @tanh_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl tanhf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl tanhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl tanhf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -509,16 +563,20 @@ define <3 x float> @exp_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl expf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl expf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl expf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -536,16 +594,20 @@ define <3 x float> @exp2_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl exp2f
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl exp2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl exp2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -572,16 +634,20 @@ define <3 x float> @log_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl logf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl logf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl logf
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -599,16 +665,20 @@ define <3 x float> @log10_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl log10f
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl log10f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl log10f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b
@@ -626,16 +696,20 @@ define <3 x float> @log2_v3f32(<3 x float> %x) nounwind {
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
 ; CHECK-NEXT:    bl log2f
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl log2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl log2f
 ; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
 ; CHECK-NEXT:    mov v1.s[2], v0.s[0]
 ; CHECK-NEXT:    mov v0.16b, v1.16b

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
index fd0f8fe306bfa6..4d847fb96cbb72 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add-legalization.ll
@@ -29,6 +29,7 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind {
 define i8 @test_v1i8(<1 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a)
@@ -38,6 +39,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
 define i16 @test_v1i16(<1 x i16> %a) nounwind {
 ; CHECK-LABEL: test_v1i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a)
@@ -55,6 +57,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
 define i32 @test_v1i32(<1 x i32> %a) nounwind {
 ; CHECK-LABEL: test_v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %b = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a)
@@ -64,6 +67,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
 define i64 @test_v1i64(<1 x i64> %a) nounwind {
 ; CHECK-LABEL: test_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %b = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
index 1a7a81c321d5c4..8473f45f6c803b 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -1765,6 +1765,8 @@ entry:
 define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i32_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uaddlv d0, v0.4s
 ; CHECK-SD-NEXT:    fmov x0, d0
@@ -1871,6 +1873,8 @@ entry:
 define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
 ; CHECK-SD-LABEL: add_pair_v4i16_v4i32_zext:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uaddlv s0, v0.8h
 ; CHECK-SD-NEXT:    fmov w0, s0
@@ -2773,6 +2777,8 @@ entry:
 define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i16_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    movi v2.2d, #0x00ffff0000ffff
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
@@ -3067,6 +3073,8 @@ entry:
 define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v4i8_v4i32_zext:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    bic v1.4h, #255, lsl #8
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
@@ -3187,6 +3195,8 @@ entry:
 define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v8i8_v8i16_zext:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uaddlv h0, v0.16b
 ; CHECK-SD-NEXT:    umov w0, v0.h[0]
@@ -3521,6 +3531,8 @@ entry:
 define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-SD-LABEL: add_pair_v2i8_v2i64_zext:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
 ; CHECK-SD-NEXT:    movi v2.2d, #0x0000ff000000ff
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
@@ -4549,6 +4561,8 @@ entry:
 define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-BASE-LABEL: full:
 ; CHECK-SD-BASE:       // %bb.0: // %entry
+; CHECK-SD-BASE-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-SD-BASE-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-BASE-NEXT:    sxtw x8, w3
 ; CHECK-SD-BASE-NEXT:    sxtw x9, w1
 ; CHECK-SD-BASE-NEXT:    ldr d0, [x0]
@@ -4603,6 +4617,8 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ; CHECK-SD-DOT:       // %bb.0: // %entry
 ; CHECK-SD-DOT-NEXT:    ldr d0, [x0]
 ; CHECK-SD-DOT-NEXT:    ldr d1, [x2]
+; CHECK-SD-DOT-NEXT:    // kill: def $w3 killed $w3 def $x3
+; CHECK-SD-DOT-NEXT:    // kill: def $w1 killed $w1 def $x1
 ; CHECK-SD-DOT-NEXT:    sxtw x8, w3
 ; CHECK-SD-DOT-NEXT:    sxtw x9, w1
 ; CHECK-SD-DOT-NEXT:    movi v2.2d, #0000000000000000
@@ -4655,6 +4671,8 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) {
 ;
 ; CHECK-GI-LABEL: full:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w3 killed $w3 def $x3
 ; CHECK-GI-NEXT:    sxtw x9, w1
 ; CHECK-GI-NEXT:    sxtw x8, w3
 ; CHECK-GI-NEXT:    ldr d0, [x0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
index 0c40e0534e846d..7fa416e0dbcd5c 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll
@@ -29,6 +29,7 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind {
 define i8 @test_v1i8(<1 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a)
@@ -38,6 +39,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
 define i16 @test_v1i16(<1 x i16> %a) nounwind {
 ; CHECK-LABEL: test_v1i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = call i16 @llvm.vector.reduce.and.v1i16(<1 x i16> %a)
@@ -55,6 +57,7 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
 define i32 @test_v1i32(<1 x i32> %a) nounwind {
 ; CHECK-LABEL: test_v1i32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov w0, s0
 ; CHECK-NEXT:    ret
   %b = call i32 @llvm.vector.reduce.and.v1i32(<1 x i32> %a)
@@ -64,6 +67,7 @@ define i32 @test_v1i32(<1 x i32> %a) nounwind {
 define i64 @test_v1i64(<1 x i64> %a) nounwind {
 ; CHECK-LABEL: test_v1i64:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fmov x0, d0
 ; CHECK-NEXT:    ret
   %b = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %a)

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index 6d830c71366eab..58020d28702b2f 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -18,6 +18,7 @@ declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
 define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_and_v1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[0]
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel w0, w0, w1, lt
@@ -111,6 +112,7 @@ define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_or_v1:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    smov w8, v0.b[0]
 ; CHECK-NEXT:    cmp w8, #0
 ; CHECK-NEXT:    csel w0, w0, w1, lt

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
index 2fdbfbb9046963..215b6e086591dc 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
@@ -36,6 +36,7 @@ define half @test_v1f16_neutral(<1 x half> %a) nounwind {
 define float @test_v1f32(<1 x float> %a, float %s) nounwind {
 ; CHECK-LABEL: test_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fadd s0, s1, s0
 ; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
@@ -45,6 +46,8 @@ define float @test_v1f32(<1 x float> %a, float %s) nounwind {
 define float @test_v1f32_neutral(<1 x float> %a) nounwind {
 ; CHECK-LABEL: test_v1f32_neutral:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
   ret float %b

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
index 28803cdb323c4b..a2e5a8a1b4c46f 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
@@ -22,6 +22,8 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 define float @test_v1f32(<1 x float> %a) nounwind {
 ; CHECK-LABEL: test_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
   %b = call reassoc float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
   ret float %b
@@ -58,10 +60,15 @@ define float @test_v3f32(<3 x float> %a) nounwind {
 define float @test_v5f32(<5 x float> %a) nounwind {
 ; CHECK-LABEL: test_v5f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    // kill: def $s1 killed $s1 def $q1
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
 ; CHECK-NEXT:    movi v5.4s, #128, lsl #24
-; CHECK-NEXT:    mov v0.s[2], v2.s[0]
+; CHECK-NEXT:    // kill: def $s4 killed $s4 def $q4
+; CHECK-NEXT:    // kill: def $s3 killed $s3 def $q3
+; CHECK-NEXT:    mov v0.s[1], v1.s[0]
 ; CHECK-NEXT:    mov v5.s[0], v4.s[0]
+; CHECK-NEXT:    mov v0.s[2], v2.s[0]
 ; CHECK-NEXT:    mov v0.s[3], v3.s[0]
 ; CHECK-NEXT:    fadd v0.4s, v0.4s, v5.4s
 ; CHECK-NEXT:    faddp v0.4s, v0.4s, v0.4s

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 12fc34f2f2b463..1164e02a16c9e0 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -1,14 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI-FP16
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
 
 define float @add_HalfS(<2 x float> %bin.rdx)  {
-; CHECK-LABEL: add_HalfS:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    faddp s0, v0.2s
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: add_HalfS:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    faddp s0, v0.2s
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: add_HalfS:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    faddp s0, v0.2s
+; CHECK-GI-NEXT:    ret
   %r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
   ret float %r
 }
@@ -16,6 +22,7 @@ define float @add_HalfS(<2 x float> %bin.rdx)  {
 define half @add_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: add_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -35,6 +42,7 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-SD-FP16-LABEL: add_HalfH:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    mov h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    faddp h2, v0.2h
 ; CHECK-SD-FP16-NEXT:    mov h0, v0.h[3]
@@ -45,6 +53,7 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-GI-NOFP16-LABEL: add_HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
 ; CHECK-GI-NOFP16-NEXT:    mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
@@ -69,6 +78,7 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: add_HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    fadd h1, h0, h1

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 37a3f123fc28de..03db1d0d433d3c 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -16,6 +16,7 @@ define float @add_HalfS(<2 x float> %bin.rdx)  {
 define half @add_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: add_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -368,6 +369,7 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
 ; CHECK-GI-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-GI-NOFP16-NEXT:    b.ne .LBB10_1
 ; CHECK-GI-NOFP16-NEXT:  // %bb.2: // %exit
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -499,6 +501,7 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
 ; CHECK-GI-NOFP16-NEXT:    fmov w9, s0
 ; CHECK-GI-NOFP16-NEXT:    b.ne .LBB11_1
 ; CHECK-GI-NOFP16-NEXT:  // %bb.2: // %exit
+; CHECK-GI-NOFP16-NEXT:    // kill: def $h0 killed $h0 killed $s0
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
index 8d5b65958b7e70..1d295a30a994b0 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization-nan.ll
@@ -44,9 +44,27 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-LABEL: test_v1f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-NOFP-SD-LABEL: test_v1f32:
+; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NOFP-SD-NEXT:    ret
+;
+; CHECK-FP-SD-LABEL: test_v1f32:
+; CHECK-FP-SD:       // %bb.0:
+; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-FP-SD-NEXT:    ret
+;
+; CHECK-NOFP-GI-LABEL: test_v1f32:
+; CHECK-NOFP-GI:       // %bb.0:
+; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-NOFP-GI-NEXT:    ret
+;
+; CHECK-FP-GI-LABEL: test_v1f32:
+; CHECK-FP-GI:       // %bb.0:
+; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-FP-GI-NEXT:    ret
   %b = call float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -70,6 +88,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -105,6 +124,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -428,9 +448,17 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #254, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
@@ -499,9 +527,17 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16_ninf:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #254, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
index 106ab214a056f6..ee2af110c84cd5 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll
@@ -44,9 +44,27 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-LABEL: test_v1f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-NOFP-SD-LABEL: test_v1f32:
+; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NOFP-SD-NEXT:    ret
+;
+; CHECK-FP-SD-LABEL: test_v1f32:
+; CHECK-FP-SD:       // %bb.0:
+; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-FP-SD-NEXT:    ret
+;
+; CHECK-NOFP-GI-LABEL: test_v1f32:
+; CHECK-NOFP-GI:       // %bb.0:
+; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-NOFP-GI-NEXT:    ret
+;
+; CHECK-FP-GI-LABEL: test_v1f32:
+; CHECK-FP-GI:       // %bb.0:
+; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-FP-GI-NEXT:    ret
   %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -70,6 +88,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -105,6 +124,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -428,9 +448,17 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #252, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
@@ -499,9 +527,17 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16_ninf:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    mvni v1.8h, #4, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
index a810f23d92a34b..be61f9b5217952 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll
@@ -40,9 +40,27 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-LABEL: test_v1f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-NOFP-SD-LABEL: test_v1f32:
+; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NOFP-SD-NEXT:    ret
+;
+; CHECK-FP-SD-LABEL: test_v1f32:
+; CHECK-FP-SD:       // %bb.0:
+; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-FP-SD-NEXT:    ret
+;
+; CHECK-NOFP-GI-LABEL: test_v1f32:
+; CHECK-NOFP-GI:       // %bb.0:
+; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-NOFP-GI-NEXT:    ret
+;
+; CHECK-FP-GI-LABEL: test_v1f32:
+; CHECK-FP-GI:       // %bb.0:
+; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-FP-GI-NEXT:    ret
   %b = call float @llvm.vector.reduce.fmaximum.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -66,6 +84,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -389,9 +408,17 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #252, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
index f77e6285f978a8..300081dc3ec405 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll
@@ -44,9 +44,27 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-LABEL: test_v1f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-NOFP-SD-LABEL: test_v1f32:
+; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NOFP-SD-NEXT:    ret
+;
+; CHECK-FP-SD-LABEL: test_v1f32:
+; CHECK-FP-SD:       // %bb.0:
+; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-FP-SD-NEXT:    ret
+;
+; CHECK-NOFP-GI-LABEL: test_v1f32:
+; CHECK-NOFP-GI:       // %bb.0:
+; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-NOFP-GI-NEXT:    ret
+;
+; CHECK-FP-GI-LABEL: test_v1f32:
+; CHECK-FP-GI:       // %bb.0:
+; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-FP-GI-NEXT:    ret
   %b = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -70,6 +88,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -105,6 +124,7 @@ define half @test_v4f16(<4 x half> %a) nounwind {
 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -428,9 +448,17 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #124, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
@@ -499,9 +527,17 @@ define half @test_v11f16_ninf(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16_ninf:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    mvni v1.8h, #132, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
index 675e584953d942..e735f670ced0cf 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll
@@ -40,9 +40,27 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 }
 
 define float @test_v1f32(<1 x float> %a) nounwind {
-; CHECK-LABEL: test_v1f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
+; CHECK-NOFP-SD-LABEL: test_v1f32:
+; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NOFP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NOFP-SD-NEXT:    ret
+;
+; CHECK-FP-SD-LABEL: test_v1f32:
+; CHECK-FP-SD:       // %bb.0:
+; CHECK-FP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-FP-SD-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-FP-SD-NEXT:    ret
+;
+; CHECK-NOFP-GI-LABEL: test_v1f32:
+; CHECK-NOFP-GI:       // %bb.0:
+; CHECK-NOFP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-NOFP-GI-NEXT:    ret
+;
+; CHECK-FP-GI-LABEL: test_v1f32:
+; CHECK-FP-GI:       // %bb.0:
+; CHECK-FP-GI-NEXT:    // kill: def $s0 killed $s0 killed $d0
+; CHECK-FP-GI-NEXT:    ret
   %b = call float @llvm.vector.reduce.fminimum.v1f32(<1 x float> %a)
   ret float %b
 }
@@ -66,6 +84,7 @@ define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
 define half @test_v4f16(<4 x half> %a) nounwind {
 ; CHECK-NOFP-SD-LABEL: test_v4f16:
 ; CHECK-NOFP-SD:       // %bb.0:
+; CHECK-NOFP-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NOFP-SD-NEXT:    mov h1, v0.h[1]
 ; CHECK-NOFP-SD-NEXT:    fcvt s2, h0
 ; CHECK-NOFP-SD-NEXT:    fcvt s1, h1
@@ -389,9 +408,17 @@ define half @test_v11f16(<11 x half> %a) nounwind {
 ;
 ; CHECK-FP-LABEL: test_v11f16:
 ; CHECK-FP:       // %bb.0:
+; CHECK-FP-NEXT:    // kill: def $h0 killed $h0 def $q0
+; CHECK-FP-NEXT:    // kill: def $h1 killed $h1 def $q1
+; CHECK-FP-NEXT:    // kill: def $h2 killed $h2 def $q2
+; CHECK-FP-NEXT:    // kill: def $h3 killed $h3 def $q3
+; CHECK-FP-NEXT:    // kill: def $h4 killed $h4 def $q4
+; CHECK-FP-NEXT:    // kill: def $h5 killed $h5 def $q5
+; CHECK-FP-NEXT:    mov x8, sp
+; CHECK-FP-NEXT:    // kill: def $h6 killed $h6 def $q6
+; CHECK-FP-NEXT:    // kill: def $h7 killed $h7 def $q7
 ; CHECK-FP-NEXT:    mov v0.h[1], v1.h[0]
 ; CHECK-FP-NEXT:    movi v1.8h, #124, lsl #8
-; CHECK-FP-NEXT:    mov x8, sp
 ; CHECK-FP-NEXT:    mov v0.h[2], v2.h[0]
 ; CHECK-FP-NEXT:    ld1 { v1.h }[0], [x8]
 ; CHECK-FP-NEXT:    add x8, sp, #8

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
index 4b738bef5fa4d5..ce7ae1e426bdac 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-legalization-strict.ll
@@ -23,6 +23,8 @@ define half @test_v1f16(<1 x half> %a) nounwind {
 define float @test_v1f32(<1 x float> %a) nounwind {
 ; CHECK-LABEL: test_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    ret
   %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 1.0, <1 x float> %a)
   ret float %b

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index 2636d5b2a34bb3..e1b21705c95f3e 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -7,11 +7,13 @@
 define float @mul_HalfS(<2 x float> %bin.rdx)  {
 ; CHECK-SD-LABEL: mul_HalfS:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmul s0, s0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mul_HalfS:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NEXT:    ret
@@ -22,6 +24,7 @@ define float @mul_HalfS(<2 x float> %bin.rdx)  {
 define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: mul_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -41,6 +44,7 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-SD-FP16-LABEL: mul_HalfH:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    fmul h1, h0, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fmul h0, h1, v0.h[3]
@@ -49,6 +53,7 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-GI-NOFP16-LABEL: mul_HalfH:
 ; CHECK-GI-NOFP16:       // %bb.0:
 ; CHECK-GI-NOFP16-NEXT:    mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-GI-NOFP16-NEXT:    fmov s1, w8
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
@@ -73,6 +78,7 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: mul_HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    fmul h1, h0, h1

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
index 9047c1d09a6935..da75a805952120 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul.ll
@@ -7,11 +7,13 @@
 define float @mul_HalfS(<2 x float> %bin.rdx)  {
 ; CHECK-SD-LABEL: mul_HalfS:
 ; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NEXT:    fmul s0, s0, v0.s[1]
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: mul_HalfS:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    mov s1, v0.s[1]
 ; CHECK-GI-NEXT:    fmul s0, s0, s1
 ; CHECK-GI-NEXT:    ret
@@ -22,6 +24,7 @@ define float @mul_HalfS(<2 x float> %bin.rdx)  {
 define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ; CHECK-SD-NOFP16-LABEL: mul_HalfH:
 ; CHECK-SD-NOFP16:       // %bb.0:
+; CHECK-SD-NOFP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-NOFP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-SD-NOFP16-NEXT:    fcvt s2, h0
 ; CHECK-SD-NOFP16-NEXT:    fcvt s1, h1
@@ -41,6 +44,7 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-SD-FP16-LABEL: mul_HalfH:
 ; CHECK-SD-FP16:       // %bb.0:
+; CHECK-SD-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-SD-FP16-NEXT:    fmul h1, h0, v0.h[1]
 ; CHECK-SD-FP16-NEXT:    fmul h1, h1, v0.h[2]
 ; CHECK-SD-FP16-NEXT:    fmul h0, h1, v0.h[3]
@@ -58,6 +62,7 @@ define half @mul_HalfH(<4 x half> %bin.rdx)  {
 ;
 ; CHECK-GI-FP16-LABEL: mul_HalfH:
 ; CHECK-GI-FP16:       // %bb.0:
+; CHECK-GI-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
 ; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
 ; CHECK-GI-FP16-NEXT:    mov h3, v0.h[3]

diff  --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
index 9b652704fca704..809a6d6556a7be 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll
@@ -31,6 +31,7 @@ define i1 @test_v1i1(<1 x i1> %a) nounwind {
 define i8 @test_v1i8(<1 x i8> %a) nounwind {
 ; CHECK-LABEL: test_v1i8:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.b[0]
 ; CHECK-NEXT:    ret
   %b = call i8 @llvm.vector.reduce.umax.v1i8(<1 x i8> %a)
@@ -40,6 +41,7 @@ define i8 @test_v1i8(<1 x i8> %a) nounwind {
 define i16 @test_v1i16(<1 x i16> %a) nounwind {
 ; CHECK-LABEL: test_v1i16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    ret
   %b = call i16 @llvm.vector.reduce.umax.v1i16(<1 x i16> %a)
@@ -55,19 +57,31 @@ define i24 @test_v1i24(<1 x i24> %a) nounwind {
 }
 
 define i32 @test_v1i32(<1 x i32> %a) nounwind {
-; CHECK-LABEL: test_v1i32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov w0, s0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_v1i32:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov w0, s0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_v1i32:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov w0, s0
+; CHECK-GI-NEXT:    ret
   %b = call i32 @llvm.vector.reduce.umax.v1i32(<1 x i32> %a)
   ret i32 %b
 }
 
 define i64 @test_v1i64(<1 x i64> %a) nounwind {
-; CHECK-LABEL: test_v1i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmov x0, d0
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: test_v1i64:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    fmov x0, d0
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: test_v1i64:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    fmov x0, d0
+; CHECK-GI-NEXT:    ret
   %b = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> %a)
   ret i64 %b
 }
@@ -195,6 +209,7 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind {
 ;
 ; CHECK-GI-LABEL: test_v4i1:
 ; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]

diff  --git a/llvm/test/CodeGen/AArch64/vector-compress.ll b/llvm/test/CodeGen/AArch64/vector-compress.ll
index 4f76c92039f1f1..710ea70d678c5b 100644
--- a/llvm/test/CodeGen/AArch64/vector-compress.ll
+++ b/llvm/test/CodeGen/AArch64/vector-compress.ll
@@ -206,6 +206,7 @@ define <8 x i32> @test_compress_large(<8 x i32> %vec, <8 x i1> %mask) {
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    ; kill: def $d2 killed $d2 def $q2
 ; CHECK-NEXT:    umov.b w9, v2[0]
 ; CHECK-NEXT:    umov.b w10, v2[1]
 ; CHECK-NEXT:    mov x12, sp
@@ -347,6 +348,7 @@ define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) {
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shl.4h v1, v1, #15
 ; CHECK-NEXT:    add x8, sp, #8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str h0, [sp, #8]
 ; CHECK-NEXT:    cmlt.4h v1, v1, #0
 ; CHECK-NEXT:    umov.h w9, v1[0]
@@ -378,6 +380,7 @@ define <4 x i4> @test_compress_illegal_element_type(<4 x i4> %vec, <4 x i1> %mas
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    shl.4h v1, v1, #15
 ; CHECK-NEXT:    add x8, sp, #8
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str h0, [sp, #8]
 ; CHECK-NEXT:    cmlt.4h v1, v1, #0
 ; CHECK-NEXT:    umov.h w9, v1[0]

diff  --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
index a4cb06a05968cf..063b23275c6167 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll
@@ -21,8 +21,9 @@ define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0
 define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
 ; CHECK-LABEL: test_copysign_v1f32_v1f64:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    fcvtn v1.2s, v1.2d
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    mvni.2s v2, #128, lsl #24
+; CHECK-NEXT:    fcvtn v1.2s, v1.2d
 ; CHECK-NEXT:    bif.8b v0, v1, v2
 ; CHECK-NEXT:    ret
   %tmp0 = fptrunc <1 x double> %b to <1 x float>
@@ -40,8 +41,10 @@ define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b)
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
 ; CHECK-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    fneg.2d v2, v2
 ; CHECK-NEXT:    bif.16b v0, v1, v2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %tmp0 = fpext <1 x float> %b to <1 x double>
   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
@@ -52,8 +55,11 @@ define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b)
 ; CHECK-LABEL: test_copysign_v1f64_v1f64:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    ; kill: def $d1 killed $d1 def $q1
 ; CHECK-NEXT:    fneg.2d v2, v2
 ; CHECK-NEXT:    bif.16b v0, v1, v2
+; CHECK-NEXT:    ; kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
   %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
   ret <1 x double> %r

diff  --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
index 1103cf465e5e3a..8f38bdbedc629b 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
@@ -212,6 +212,7 @@ define <4 x double> @sitofp_v4i8_double(<4 x i8> %a) {
 define <8 x double> @sitofp_v8i8_double(<8 x i8> %a) {
 ; CHECK-LABEL: sitofp_v8i8_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[2]
 ; CHECK-NEXT:    umov w11, v0.b[4]
@@ -388,6 +389,7 @@ define <4 x double> @uitofp_v4i8_double(<4 x i8> %a) {
 define <8 x double> @uitofp_v8i8_double(<8 x i8> %a) {
 ; CHECK-LABEL: uitofp_v8i8_double:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w8, v0.b[0]
 ; CHECK-NEXT:    umov w9, v0.b[2]
 ; CHECK-NEXT:    umov w11, v0.b[4]

diff  --git a/llvm/test/CodeGen/AArch64/vector-llrint.ll b/llvm/test/CodeGen/AArch64/vector-llrint.ll
index dad322574965af..5503de2b4c5db9 100644
--- a/llvm/test/CodeGen/AArch64/vector-llrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-llrint.ll
@@ -17,6 +17,7 @@ declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>)
 define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: llrint_v1i64_v2f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[1]
 ; CHECK-NEXT:    fcvt s0, h0
 ; CHECK-NEXT:    fcvt s1, h1
@@ -35,6 +36,7 @@ declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>)
 define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) {
 ; CHECK-LABEL: llrint_v4i64_v4f16:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov h1, v0.h[2]
 ; CHECK-NEXT:    mov h2, v0.h[1]
 ; CHECK-NEXT:    mov h3, v0.h[3]
@@ -371,6 +373,7 @@ declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>)
 define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) {
 ; CHECK-LABEL: llrint_v1i64_v1f32:
 ; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    frintx s0, s0
 ; CHECK-NEXT:    fcvtzs x8, s0
 ; CHECK-NEXT:    fmov d0, x8

diff  --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll
index 105fd65f409b9f..602643264e7be0 100644
--- a/llvm/test/CodeGen/AArch64/vector-lrint.ll
+++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll
@@ -2,15 +2,48 @@
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon |\
 ; RUN:   FileCheck %s --check-prefixes=CHECK-i32
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon |\
-; RUN:   FileCheck %s --check-prefixes=CHECK-i64
+; RUN:   FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-SD
 ; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=aarch64 -mattr=+neon \
 ; RUN:   -global-isel -global-isel-abort=2 2>&1 |\
-; RUN:   FileCheck %s --check-prefixes=CHECK-i32
+; RUN:   FileCheck %s --check-prefixes=CHECK-i32,CHECK-i32-GI
 ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=aarch64 -mattr=+neon \
 ; RUN:   -global-isel -global-isel-abort=2 2>&1 |\
-; RUN:   FileCheck %s --check-prefixes=CHECK-i64
+; RUN:   FileCheck %s --check-prefixes=CHECK-i64,CHECK-i64-GI
 
+; CHECK-i32-GI:       warning: Instruction selection used fallback path for lrint_v1f16
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f16
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f16
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f16
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f16
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f16
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v1f32
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f32
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f32
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f32
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f32
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f32
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v1f64
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f64
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f64
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f64
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f64
+; CHECK-i32-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f64
 
+; CHECK-i64-GI:       warning: Instruction selection used fallback path for lrint_v2f16
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f16
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f16
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f16
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f16
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f32
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f32
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f32
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f32
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f32
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v2f64
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v4f64
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v8f64
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v16f64
+; CHECK-i64-GI-NEXT:  warning: Instruction selection used fallback path for lrint_v32f64
 
 define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v1f16:
@@ -36,6 +69,7 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half>)
 define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v2f16:
 ; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i32-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-NEXT:    fcvt s0, h0
 ; CHECK-i32-NEXT:    fcvt s1, h1
@@ -45,10 +79,12 @@ define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, s1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f16:
 ; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    mov h1, v0.h[1]
 ; CHECK-i64-NEXT:    fcvt s0, h0
 ; CHECK-i64-NEXT:    fcvt s1, h1
@@ -67,6 +103,7 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half>)
 define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 ; CHECK-i32-LABEL: lrint_v4f16:
 ; CHECK-i32:       // %bb.0:
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i32-NEXT:    mov h1, v0.h[1]
 ; CHECK-i32-NEXT:    fcvt s2, h0
 ; CHECK-i32-NEXT:    mov h3, v0.h[2]
@@ -90,6 +127,7 @@ define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
 ;
 ; CHECK-i64-LABEL: lrint_v4f16:
 ; CHECK-i64:       // %bb.0:
+; CHECK-i64-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-i64-NEXT:    mov h1, v0.h[2]
 ; CHECK-i64-NEXT:    mov h2, v0.h[1]
 ; CHECK-i64-NEXT:    mov h3, v0.h[3]
@@ -717,12 +755,20 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
 ; CHECK-i32-NEXT:    fcvtzs v0.2s, v0.2s
 ; CHECK-i32-NEXT:    ret
 ;
-; CHECK-i64-LABEL: lrint_v1f32:
-; CHECK-i64:       // %bb.0:
-; CHECK-i64-NEXT:    frintx s0, s0
-; CHECK-i64-NEXT:    fcvtzs x8, s0
-; CHECK-i64-NEXT:    fmov d0, x8
-; CHECK-i64-NEXT:    ret
+; CHECK-i64-SD-LABEL: lrint_v1f32:
+; CHECK-i64-SD:       // %bb.0:
+; CHECK-i64-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-i64-SD-NEXT:    frintx s0, s0
+; CHECK-i64-SD-NEXT:    fcvtzs x8, s0
+; CHECK-i64-SD-NEXT:    fmov d0, x8
+; CHECK-i64-SD-NEXT:    ret
+;
+; CHECK-i64-GI-LABEL: lrint_v1f32:
+; CHECK-i64-GI:       // %bb.0:
+; CHECK-i64-GI-NEXT:    frintx s0, s0
+; CHECK-i64-GI-NEXT:    fcvtzs x8, s0
+; CHECK-i64-GI-NEXT:    fmov d0, x8
+; CHECK-i64-GI-NEXT:    ret
   %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x)
   ret <1 x iXLen> %a
 }
@@ -969,6 +1015,7 @@ define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
 ; CHECK-i32-NEXT:    fcvtzs w9, d1
 ; CHECK-i32-NEXT:    fmov s0, w8
 ; CHECK-i32-NEXT:    mov v0.s[1], w9
+; CHECK-i32-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-i32-NEXT:    ret
 ;
 ; CHECK-i64-LABEL: lrint_v2f64:

diff  --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
index e453d618325225..3685e9cf85bd6e 100644
--- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
+++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll
@@ -10,9 +10,9 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
 ; CHECK-NEXT:  .LBB0_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x0], #32
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT:    str q0, [x1, x8]
+; CHECK-NEXT:    fmul v2.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v2.4s, v1.4s, v1.4s
+; CHECK-NEXT:    str q2, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB0_1
@@ -50,10 +50,10 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
 ; CHECK-NEXT:  .LBB1_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmla v0.4s, v2.4s, v2.4s
-; CHECK-NEXT:    str q0, [x1, x8]
+; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v3.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmla v3.4s, v2.4s, v2.4s
+; CHECK-NEXT:    str q3, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB1_1
@@ -97,11 +97,11 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32
 ; CHECK-NEXT:    add x9, x1, x8
 ; CHECK-NEXT:    add x8, x8, #32
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmul v1.4s, v2.4s, v2.4s
-; CHECK-NEXT:    fmla v1.4s, v3.4s, v3.4s
-; CHECK-NEXT:    st2 { v0.4s, v1.4s }, [x9]
+; CHECK-NEXT:    fmul v4.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v4.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmul v5.4s, v2.4s, v2.4s
+; CHECK-NEXT:    fmla v5.4s, v3.4s, v3.4s
+; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x9]
 ; CHECK-NEXT:    b.ne .LBB2_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -149,9 +149,9 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2,
 ; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x9]
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
 ; CHECK-NEXT:    ld2 { v2.4s, v3.4s }, [x10]
-; CHECK-NEXT:    fmul v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v3.4s
-; CHECK-NEXT:    str q0, [x2], #16
+; CHECK-NEXT:    fmul v4.4s, v2.4s, v0.4s
+; CHECK-NEXT:    fmla v4.4s, v1.4s, v3.4s
+; CHECK-NEXT:    str q4, [x2], #16
 ; CHECK-NEXT:    b.ne .LBB3_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -190,9 +190,9 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
 ; CHECK-NEXT:  .LBB4_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld2 { v0.4s, v1.4s }, [x0], #32
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT:    str q0, [x1, x8]
+; CHECK-NEXT:    fmul v2.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v2.4s, v1.4s, v1.4s
+; CHECK-NEXT:    str q2, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB4_1
@@ -229,10 +229,10 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
 ; CHECK-NEXT:  .LBB5_1: // %vector.body
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmla v0.4s, v2.4s, v2.4s
-; CHECK-NEXT:    str q0, [x1, x8]
+; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v3.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmla v3.4s, v2.4s, v2.4s
+; CHECK-NEXT:    str q3, [x1, x8]
 ; CHECK-NEXT:    add x8, x8, #16
 ; CHECK-NEXT:    cmp x8, #1, lsl #12 // =4096
 ; CHECK-NEXT:    b.ne .LBB5_1
@@ -274,11 +274,11 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture %
 ; CHECK-NEXT:    add x9, x1, x8
 ; CHECK-NEXT:    add x8, x8, #32
 ; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
-; CHECK-NEXT:    fmul v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    fmla v0.4s, v1.4s, v1.4s
-; CHECK-NEXT:    fmul v1.4s, v2.4s, v2.4s
-; CHECK-NEXT:    fmla v1.4s, v3.4s, v3.4s
-; CHECK-NEXT:    st2 { v0.4s, v1.4s }, [x9]
+; CHECK-NEXT:    fmul v4.4s, v0.4s, v0.4s
+; CHECK-NEXT:    fmla v4.4s, v1.4s, v1.4s
+; CHECK-NEXT:    fmul v5.4s, v2.4s, v2.4s
+; CHECK-NEXT:    fmla v5.4s, v3.4s, v3.4s
+; CHECK-NEXT:    st2 { v4.4s, v5.4s }, [x9]
 ; CHECK-NEXT:    b.ne .LBB6_1
 ; CHECK-NEXT:  // %bb.2: // %while.end
 ; CHECK-NEXT:    ret
@@ -369,16 +369,16 @@ define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) {
 ; CHECK:       .Lfunc_begin8:
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  // %bb.0: // %entry
-; CHECK-NEXT:    ldp q0, q1, [x0]
-; CHECK-NEXT:    ldp q2, q3, [x0, #64]
-; CHECK-NEXT:    ldp q4, q5, [x0, #32]
-; CHECK-NEXT:    ldp q6, q7, [x0, #96]
-; CHECK-NEXT:    mov v0.h[5], v1.h[4]
-; CHECK-NEXT:    zip1 v1.8h, v2.8h, v3.8h
-; CHECK-NEXT:    zip1 v2.8h, v4.8h, v5.8h
-; CHECK-NEXT:    mov v6.h[5], v7.h[4]
-; CHECK-NEXT:    mov v0.s[1], v1.s[0]
-; CHECK-NEXT:    uzp1 v1.4s, v2.4s, v6.4s
+; CHECK-NEXT:    ldp q0, q2, [x0]
+; CHECK-NEXT:    ldp q3, q4, [x0, #64]
+; CHECK-NEXT:    ldp q5, q6, [x0, #32]
+; CHECK-NEXT:    ldp q7, q16, [x0, #96]
+; CHECK-NEXT:    mov v0.h[5], v2.h[4]
+; CHECK-NEXT:    zip1 v2.8h, v3.8h, v4.8h
+; CHECK-NEXT:    zip1 v3.8h, v5.8h, v6.8h
+; CHECK-NEXT:    mov v7.h[5], v16.h[4]
+; CHECK-NEXT:    mov v0.s[1], v2.s[0]
+; CHECK-NEXT:    uzp1 v1.4s, v3.4s, v7.4s
 ; CHECK-NEXT:    zip2 v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    st2 { v0.2s, v1.2s }, [x0]
 ; CHECK-NEXT:    str q2, [x0, #64]
@@ -424,23 +424,23 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ldr q0, [x0]
 ; CHECK-NEXT:    ldr q1, [x1]
-; CHECK-NEXT:    ldr q2, [x2]
 ; CHECK-NEXT:    ldr q3, [x4]
 ; CHECK-NEXT:    ldr q4, [x5]
+; CHECK-NEXT:    ldr q2, [x2]
 ; CHECK-NEXT:    ldr q5, [x3]
 ; CHECK-NEXT:    trn1 v16.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    trn2 v0.8h, v0.8h, v1.8h
 ; CHECK-NEXT:    ldr q6, [x6]
 ; CHECK-NEXT:    ldr q7, [x7]
 ; CHECK-NEXT:    trn1 v17.8h, v3.8h, v4.8h
-; CHECK-NEXT:    trn1 v18.8h, v2.8h, v5.8h
 ; CHECK-NEXT:    trn2 v1.8h, v3.8h, v4.8h
+; CHECK-NEXT:    trn1 v18.8h, v2.8h, v5.8h
 ; CHECK-NEXT:    trn2 v2.8h, v2.8h, v5.8h
 ; CHECK-NEXT:    trn1 v19.8h, v6.8h, v7.8h
 ; CHECK-NEXT:    trn2 v3.8h, v6.8h, v7.8h
 ; CHECK-NEXT:    trn1 v4.4s, v16.4s, v17.4s
-; CHECK-NEXT:    trn2 v16.4s, v16.4s, v17.4s
 ; CHECK-NEXT:    trn1 v6.4s, v0.4s, v1.4s
+; CHECK-NEXT:    trn2 v16.4s, v16.4s, v17.4s
 ; CHECK-NEXT:    trn2 v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    trn1 v5.4s, v18.4s, v19.4s
 ; CHECK-NEXT:    trn1 v7.4s, v2.4s, v3.4s
@@ -668,11 +668,11 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2
 ; CHECK-NEXT:    .cfi_startproc
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ext v3.16b, v0.16b, v1.16b, #12
-; CHECK-NEXT:    ext v4.16b, v1.16b, v2.16b, #12
+; CHECK-NEXT:    ext v6.16b, v1.16b, v2.16b, #12
 ; CHECK-NEXT:    zip2 v3.4s, v0.4s, v3.4s
-; CHECK-NEXT:    zip2 v4.4s, v1.4s, v4.4s
 ; CHECK-NEXT:    mov v3.s[0], v0.s[0]
 ; CHECK-NEXT:    ext v0.16b, v2.16b, v0.16b, #12
+; CHECK-NEXT:    zip2 v4.4s, v1.4s, v6.4s
 ; CHECK-NEXT:    mov v4.s[0], v1.s[0]
 ; CHECK-NEXT:    zip2 v5.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    mov v5.s[0], v2.s[0]

diff  --git a/llvm/test/CodeGen/AArch64/win64-fpowi.ll b/llvm/test/CodeGen/AArch64/win64-fpowi.ll
index 8ab7851061f481..3eb74f8394ec4e 100644
--- a/llvm/test/CodeGen/AArch64/win64-fpowi.ll
+++ b/llvm/test/CodeGen/AArch64/win64-fpowi.ll
@@ -62,10 +62,13 @@ define <2 x double> @powi_v2f64(<2 x double> %a, i32 %b) {
 ; CHECK-NEXT:    fmov d1, d8
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    fmov d1, d8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    bl pow
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-NEXT:    .seh_startepilogue
 ; CHECK-NEXT:    ldr d8, [sp, #40] // 8-byte Folded Reload
@@ -95,16 +98,21 @@ define <2 x float> @powi_v2f32(<2 x float> %a, i32 %b) {
 ; CHECK-NEXT:    .seh_save_freg d8, 40
 ; CHECK-NEXT:    .seh_endprologue
 ; CHECK-NEXT:    scvtf s8, w0
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    fmov s1, s8
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    fmov s1, s8
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl powf
 ; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    .seh_startepilogue
 ; CHECK-NEXT:    ldr d8, [sp, #40] // 8-byte Folded Reload
 ; CHECK-NEXT:    .seh_save_freg d8, 40

diff  --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll
index 864735eab6b4ae..aaf4cad6087403 100644
--- a/llvm/test/CodeGen/AArch64/win64_vararg.ll
+++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll
@@ -178,6 +178,7 @@ define void @vla(i32, ptr, ...) local_unnamed_addr {
 ; CHECK-NEXT:    add x29, sp, #40
 ; CHECK-NEXT:    .seh_add_fp 40
 ; CHECK-NEXT:    .seh_endprologue
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    add x8, x29, #24
 ; CHECK-NEXT:    mov w9, w0
 ; CHECK-NEXT:    mov x19, x1

diff  --git a/llvm/test/CodeGen/AArch64/xtn.ll b/llvm/test/CodeGen/AArch64/xtn.ll
index 7d1fa47e32da1e..8a4d6b8c7b789f 100644
--- a/llvm/test/CodeGen/AArch64/xtn.ll
+++ b/llvm/test/CodeGen/AArch64/xtn.ll
@@ -23,6 +23,7 @@ entry:
 define i8 @xtn_i64_to_i8(i64 %a) {
 ; CHECK-LABEL: xtn_i64_to_i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i64 %a to i8
@@ -32,6 +33,7 @@ entry:
 define i8 @xtn_i128_to_i8(i128 %a) {
 ; CHECK-LABEL: xtn_i128_to_i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i128 %a to i8
@@ -50,6 +52,7 @@ entry:
 define i16 @xtn_i64_to_i16(i64 %a) {
 ; CHECK-LABEL: xtn_i64_to_i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i64 %a to i16
@@ -59,6 +62,7 @@ entry:
 define i16 @xtn_i128_to_i16(i128 %a) {
 ; CHECK-LABEL: xtn_i128_to_i16:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i128 %a to i16
@@ -68,6 +72,7 @@ entry:
 define i32 @xtn_i64_to_i32(i64 %a) {
 ; CHECK-LABEL: xtn_i64_to_i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i64 %a to i32
@@ -77,6 +82,7 @@ entry:
 define i32 @xtn_i128_to_i32(i128 %a) {
 ; CHECK-LABEL: xtn_i128_to_i32:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
 entry:
   %arg1 = trunc i128 %a to i32
@@ -125,12 +131,14 @@ define <2 x i8> @xtn_v2i128_v2i8(<2 x i128> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov v0.s[1], w2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: xtn_v2i128_v2i8:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    mov v0.s[1], w2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <2 x i128> %a to <2 x i8>
@@ -161,12 +169,14 @@ define <2 x i16> @xtn_v2i128_v2i16(<2 x i128> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov v0.s[1], w2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: xtn_v2i128_v2i16:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    mov v0.s[1], w2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <2 x i128> %a to <2 x i16>
@@ -188,12 +198,14 @@ define <2 x i32> @xtn_v2i128_v2i32(<2 x i128> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
 ; CHECK-SD-NEXT:    mov v0.s[1], w2
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: xtn_v2i128_v2i32:
 ; CHECK-GI:       // %bb.0: // %entry
 ; CHECK-GI-NEXT:    mov v0.s[0], w0
 ; CHECK-GI-NEXT:    mov v0.s[1], w2
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <2 x i128> %a to <2 x i32>
@@ -220,6 +232,7 @@ entry:
 define <3 x i8> @xtn_v3i16_v3i8(<3 x i16> %a) {
 ; CHECK-LABEL: xtn_v3i16_v3i8:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-NEXT:    umov w0, v0.h[0]
 ; CHECK-NEXT:    umov w1, v0.h[1]
 ; CHECK-NEXT:    umov w2, v0.h[2]
@@ -254,6 +267,9 @@ entry:
 define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
 ; CHECK-SD-LABEL: xtn_v3i64_v3i8:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    xtn v1.2s, v2.2d
 ; CHECK-SD-NEXT:    xtn v0.2s, v0.2d
@@ -267,6 +283,9 @@ define <3 x i8> @xtn_v3i64_v3i8(<3 x i64> %a) {
 ; CHECK-GI-NEXT:    fmov x0, d0
 ; CHECK-GI-NEXT:    fmov x1, d1
 ; CHECK-GI-NEXT:    fmov x2, d2
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 killed $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 killed $x2
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <3 x i64> %a to <3 x i8>
@@ -286,6 +305,9 @@ entry:
 define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
 ; CHECK-SD-LABEL: xtn_v3i64_v3i16:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-SD-NEXT:    xtn v0.4h, v0.4s
@@ -299,6 +321,7 @@ define <3 x i16> @xtn_v3i64_v3i16(<3 x i64> %a) {
 ; CHECK-GI-NEXT:    fmov x8, d2
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %arg1 = trunc <3 x i64> %a to <3 x i16>
@@ -308,6 +331,9 @@ entry:
 define <3 x i32> @xtn_v3i64_v3i32(<3 x i64> %a) {
 ; CHECK-SD-LABEL: xtn_v3i64_v3i32:
 ; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 def $q2
 ; CHECK-SD-NEXT:    mov v0.d[1], v1.d[0]
 ; CHECK-SD-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
 ; CHECK-SD-NEXT:    ret

diff  --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll
index 839214a7ccd984..0d5010113ce0b2 100644
--- a/llvm/test/CodeGen/AArch64/zext.ll
+++ b/llvm/test/CodeGen/AArch64/zext.ll
@@ -27,6 +27,7 @@ entry:
 define i64 @zext_i8_to_i64(i8 %a) {
 ; CHECK-LABEL: zext_i8_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x0, x0, #0xff
 ; CHECK-NEXT:    ret
 entry:
@@ -57,6 +58,7 @@ entry:
 define i64 @zext_i16_to_i64(i16 %a) {
 ; CHECK-LABEL: zext_i16_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x0, x0, #0xffff
 ; CHECK-NEXT:    ret
 entry:
@@ -97,6 +99,7 @@ entry:
 define i64 @zext_i10_to_i64(i10 %a) {
 ; CHECK-LABEL: zext_i10_to_i64:
 ; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
 ; CHECK-NEXT:    and x0, x0, #0x3ff
 ; CHECK-NEXT:    ret
 entry:
@@ -234,6 +237,7 @@ define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    mov v0.h[1], w1
 ; CHECK-SD-NEXT:    mov v0.h[2], w2
 ; CHECK-SD-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i8_v3i16:
@@ -244,6 +248,7 @@ define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) {
 ; CHECK-GI-NEXT:    and w8, w2, #0xff
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <3 x i8> %a to <3 x i16>
@@ -288,10 +293,16 @@ define <3 x i64> @zext_v3i8_v3i64(<3 x i8> %a) {
 ; CHECK-SD-NEXT:    ushll v1.2d, v3.2s, #0
 ; CHECK-SD-NEXT:    mov v2.b[0], v1.b[0]
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i8_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    and x8, x0, #0xff
 ; CHECK-GI-NEXT:    and x9, x1, #0xff
 ; CHECK-GI-NEXT:    and x10, x2, #0xff
@@ -312,6 +323,7 @@ define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) {
 ;
 ; CHECK-GI-LABEL: zext_v3i16_v3i32:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    mov v1.s[0], w8
@@ -331,11 +343,15 @@ define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) {
 ; CHECK-SD-NEXT:    ushll v2.4s, v0.4h, #0
 ; CHECK-SD-NEXT:    ushll v0.2d, v2.2s, #0
 ; CHECK-SD-NEXT:    ushll2 v2.2d, v2.4s, #0
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i16_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 def $q0
 ; CHECK-GI-NEXT:    umov w8, v0.h[0]
 ; CHECK-GI-NEXT:    umov w9, v0.h[1]
 ; CHECK-GI-NEXT:    umov w10, v0.h[2]
@@ -353,8 +369,10 @@ define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) {
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    ushll v3.2d, v0.2s, #0
 ; CHECK-SD-NEXT:    ushll2 v2.2d, v0.4s, #0
-; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d2 killed $d2 killed $q2
 ; CHECK-SD-NEXT:    fmov d0, d3
+; CHECK-SD-NEXT:    ext v1.16b, v3.16b, v3.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i32_v3i64:
@@ -378,6 +396,7 @@ define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
 ; CHECK-SD-NEXT:    mov v0.h[1], w1
 ; CHECK-SD-NEXT:    mov v0.h[2], w2
 ; CHECK-SD-NEXT:    bic v0.4h, #252, lsl #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i10_v3i16:
@@ -388,6 +407,7 @@ define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) {
 ; CHECK-GI-NEXT:    and w8, w2, #0x3ff
 ; CHECK-GI-NEXT:    mov v0.h[1], w9
 ; CHECK-GI-NEXT:    mov v0.h[2], w8
+; CHECK-GI-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-GI-NEXT:    ret
 entry:
   %c = zext <3 x i10> %a to <3 x i16>
@@ -423,8 +443,8 @@ define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) {
 ; CHECK-SD-LABEL: zext_v3i10_v3i64:
 ; CHECK-SD:       // %bb.0: // %entry
 ; CHECK-SD-NEXT:    fmov s0, w0
-; CHECK-SD-NEXT:    mov w8, #1023 // =0x3ff
 ; CHECK-SD-NEXT:    fmov s1, w2
+; CHECK-SD-NEXT:    mov w8, #1023 // =0x3ff
 ; CHECK-SD-NEXT:    dup v2.2d, x8
 ; CHECK-SD-NEXT:    mov v0.s[1], w1
 ; CHECK-SD-NEXT:    ushll v3.2d, v1.2s, #0
@@ -432,10 +452,15 @@ define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) {
 ; CHECK-SD-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-SD-NEXT:    and v2.8b, v3.8b, v2.8b
 ; CHECK-SD-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT:    // kill: def $d1 killed $d1 killed $q1
 ; CHECK-SD-NEXT:    ret
 ;
 ; CHECK-GI-LABEL: zext_v3i10_v3i64:
 ; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-GI-NEXT:    // kill: def $w1 killed $w1 def $x1
+; CHECK-GI-NEXT:    // kill: def $w2 killed $w2 def $x2
 ; CHECK-GI-NEXT:    and x8, x0, #0x3ff
 ; CHECK-GI-NEXT:    and x9, x1, #0x3ff
 ; CHECK-GI-NEXT:    and x10, x2, #0x3ff


        


More information about the llvm-commits mailing list